From a440943e68cd1b5a853a6f60865967b7cc2539eb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Sep 2021 08:59:58 +0200 Subject: unicode: remove the charset field from struct unicode_map It is hardcoded and only used for a f2fs sysfs file where it can be hardcoded just as easily. Signed-off-by: Christoph Hellwig Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Gabriel Krisman Bertazi --- include/linux/unicode.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 74484d44c755..6a392cd9f076 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -6,7 +6,6 @@ #include struct unicode_map { - const char *charset; int version; }; -- cgit v1.2.3 From f3a9c82396006a5664f6e398d6928799d29de76e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Sep 2021 08:59:59 +0200 Subject: unicode: mark the version field in struct unicode_map unsigned unicode version tripplets are always unsigned. Signed-off-by: Christoph Hellwig Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Gabriel Krisman Bertazi --- include/linux/unicode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 6a392cd9f076..0744f81c4b5f 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -6,7 +6,7 @@ #include struct unicode_map { - int version; + unsigned int version; }; int utf8_validate(const struct unicode_map *um, const struct qstr *str); -- cgit v1.2.3 From 49bd03cc7e95cb78420305ca2f5ef67497b6fa80 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Sep 2021 09:00:00 +0200 Subject: unicode: pass a UNICODE_AGE() tripple to utf8_load Don't bother with pointless string parsing when the caller can just pass the version in the format that the core expects. Also remove the fallback to the latest version that none of the callers actually uses. Signed-off-by: Christoph Hellwig Signed-off-by: Gabriel Krisman Bertazi --- include/linux/unicode.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 0744f81c4b5f..77bb915fd1f0 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -5,6 +5,29 @@ #include #include +#define UNICODE_MAJ_SHIFT 16 +#define UNICODE_MIN_SHIFT 8 + +#define UNICODE_AGE(MAJ, MIN, REV) \ + (((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) | \ + ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \ + ((unsigned int)(REV))) + +static inline u8 unicode_major(unsigned int age) +{ + return (age >> UNICODE_MAJ_SHIFT) & 0xff; +} + +static inline u8 unicode_minor(unsigned int age) +{ + return (age >> UNICODE_MIN_SHIFT) & 0xff; +} + +static inline u8 unicode_rev(unsigned int age) +{ + return age & 0xff; +} + struct unicode_map { unsigned int version; }; @@ -29,7 +52,7 @@ int utf8_casefold(const struct unicode_map *um, const struct qstr *str, int utf8_casefold_hash(const struct unicode_map *um, const void *salt, struct qstr *str); -struct unicode_map *utf8_load(const char *version); +struct unicode_map *utf8_load(unsigned int version); void utf8_unload(struct unicode_map *um); #endif /* _LINUX_UNICODE_H */ -- cgit v1.2.3 From 6ca99ce756c27852d1ea1e555045de1c920f30ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Sep 2021 09:00:04 +0200 Subject: unicode: cache the normalization tables in struct unicode_map Instead of repeatedly looking up the version add pointers to the NFD and NFD+CF tables to struct unicode_map, and pass a unicode_map plus index to the functions using the normalization tables. Signed-off-by: Christoph Hellwig Signed-off-by: Gabriel Krisman Bertazi --- include/linux/unicode.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 77bb915fd1f0..526ca8b8391a 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -5,6 +5,8 @@ #include #include +struct utf8data; + #define UNICODE_MAJ_SHIFT 16 #define UNICODE_MIN_SHIFT 8 @@ -28,8 +30,25 @@ static inline u8 unicode_rev(unsigned int age) return age & 0xff; } +/* + * Two normalization forms are supported: + * 1) NFDI + * - Apply unicode normalization form NFD. + * - Remove any Default_Ignorable_Code_Point. + * 2) NFDICF + * - Apply unicode normalization form NFD. + * - Remove any Default_Ignorable_Code_Point. + * - Apply a full casefold (C + F). + */ +enum utf8_normalization { + UTF8_NFDI = 0, + UTF8_NFDICF, + UTF8_NMAX, +}; + struct unicode_map { unsigned int version; + const struct utf8data *ntab[UTF8_NMAX]; }; int utf8_validate(const struct unicode_map *um, const struct qstr *str); -- cgit v1.2.3 From 2b3d047870120bcd46d7cc257d19ff49328fd585 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Sep 2021 09:00:05 +0200 Subject: unicode: Add utf8-data module utf8data.h contains a large database table which is an auto-generated decodification trie for the unicode normalization functions. Allow building it into a separate module. Based on a patch from Shreeya Patel . Signed-off-by: Christoph Hellwig Signed-off-by: Gabriel Krisman Bertazi --- include/linux/unicode.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 526ca8b8391a..4d39e6e11a95 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -6,6 +6,7 @@ #include struct utf8data; +struct utf8data_table; #define UNICODE_MAJ_SHIFT 16 #define UNICODE_MIN_SHIFT 8 @@ -49,6 +50,7 @@ enum utf8_normalization { struct unicode_map { unsigned int version; const struct utf8data *ntab[UTF8_NMAX]; + const struct utf8data_table *tables; }; int utf8_validate(const struct unicode_map *um, const struct qstr *str); -- cgit v1.2.3 From 107fe904302092c683cf5462b4af3cb3cfa40998 Mon Sep 17 00:00:00 2001 From: Rajat Jain Date: Tue, 5 Oct 2021 22:23:13 +0200 Subject: drm/connector: Add support for privacy-screen properties (v4) Add support for generic electronic privacy screen properties, that can be added by systems that have an integrated EPS. Changes in v2 (Hans de Goede) - Create 2 properties, "privacy-screen sw-state" and "privacy-screen hw-state", to deal with devices where the OS might be locked out of making state changes - Write kerneldoc explaining how the 2 properties work together, what happens when changes to the state are made outside of the DRM code's control, etc. Changes in v3 (Hans de Goede) - Some small tweaks to the kerneldoc describing the 2 properties Changes in v4 (Hans de Goede) - Change the "Enabled, locked" and "Disabled, locked" hw-state enum value names to "Enabled-locked" and "Disabled-locked". The xrandr command shows all possible enum values separated by commas in its output, so having a comma in an enum name is not a good idea. - Do not add a privacy_screen_hw_state member to drm_connector_state since this property is immutable its value must be directly stored in the obj->properties->values array Signed-off-by: Rajat Jain Acked-by: Pekka Paalanen Reviewed-by: Mario Limonciello Reviewed-by: Emil Velikov Reviewed-by: Lyude Paul Co-developed-by: Hans de Goede Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20211005202322.700909-2-hdegoede@redhat.com --- include/drm/drm_connector.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 79fa34e5ccdb..1acbcf0626ce 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -320,6 +320,30 @@ struct drm_monitor_range_info { u8 max_vfreq; }; +/** + * enum drm_privacy_screen_status - privacy screen status + * + * This enum is used to track and control the state of the integrated privacy + * screen present on some display panels, via the "privacy-screen sw-state" + * and "privacy-screen hw-state" properties. Note the _LOCKED enum values + * are only valid for the "privacy-screen hw-state" property. + * + * @PRIVACY_SCREEN_DISABLED: + * The privacy-screen on the panel is disabled + * @PRIVACY_SCREEN_ENABLED: + * The privacy-screen on the panel is enabled + * @PRIVACY_SCREEN_DISABLED_LOCKED: + * The privacy-screen on the panel is disabled and locked (cannot be changed) + * @PRIVACY_SCREEN_ENABLED_LOCKED: + * The privacy-screen on the panel is enabled and locked (cannot be changed) + */ +enum drm_privacy_screen_status { + PRIVACY_SCREEN_DISABLED = 0, + PRIVACY_SCREEN_ENABLED, + PRIVACY_SCREEN_DISABLED_LOCKED, + PRIVACY_SCREEN_ENABLED_LOCKED, +}; + /* * This is a consolidated colorimetry list supported by HDMI and * DP protocol standard. The respective connectors will register @@ -781,6 +805,12 @@ struct drm_connector_state { */ u8 max_bpc; + /** + * @privacy_screen_sw_state: See :ref:`Standard Connector + * Properties` + */ + enum drm_privacy_screen_status privacy_screen_sw_state; + /** * @hdr_output_metadata: * DRM blob property for HDR output metadata @@ -1409,6 +1439,18 @@ struct drm_connector { */ struct drm_property *max_bpc_property; + /** + * @privacy_screen_sw_state_property: Optional atomic property for the + * connector to control the integrated privacy screen. + */ + struct drm_property *privacy_screen_sw_state_property; + + /** + * @privacy_screen_hw_state_property: Optional atomic property for the + * connector to report the actual integrated privacy screen state. + */ + struct drm_property *privacy_screen_hw_state_property; + #define DRM_CONNECTOR_POLL_HPD (1 << 0) #define DRM_CONNECTOR_POLL_CONNECT (1 << 1) #define DRM_CONNECTOR_POLL_DISCONNECT (1 << 2) @@ -1732,6 +1774,8 @@ int drm_connector_set_panel_orientation_with_quirk( int width, int height); int drm_connector_attach_max_bpc_property(struct drm_connector *connector, int min, int max); +void drm_connector_create_privacy_screen_properties(struct drm_connector *conn); +void drm_connector_attach_privacy_screen_properties(struct drm_connector *conn); /** * struct drm_tile_group - Tile group metadata -- cgit v1.2.3 From a1a98689301b9af0313e4c1ba44558e8b67ff76e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 5 Oct 2021 22:23:14 +0200 Subject: drm: Add privacy-screen class (v4) On some new laptops the LCD panel has a builtin electronic privacy-screen. We want to export this functionality as a property on the drm connector object. But often this functionality is not exposed on the GPU but on some other (ACPI) device. This commit adds a privacy-screen class allowing the driver for these other devices to register themselves as a privacy-screen provider; and allowing the drm/kms code to get a privacy-screen provider associated with a specific GPU/connector combo. Changes in v2: - Make CONFIG_DRM_PRIVACY_SCREEN a bool which controls if the drm_privacy code gets built as part of the main drm module rather then making it a tristate which builds its own module. - Add a #if IS_ENABLED(CONFIG_DRM_PRIVACY_SCREEN) check to drm_privacy_screen_consumer.h and define stubs when the check fails. Together these 2 changes fix several dependency issues. - Remove module related code now that this is part of the main drm.ko - Use drm_class as class for the privacy-screen devices instead of adding a separate class for this Changes in v3: - Make the static inline drm_privacy_screen_get_state() stub set sw_state and hw_state to PRIVACY_SCREEN_DISABLED to squelch an uninitialized variable warning when CONFIG_DRM_PRIVICAY_SCREEN is not set Changes in v4: - Make drm_privacy_screen_set_sw_state() skip calling out to the hw if hw_state == new_sw_state Reviewed-by: Emil Velikov Reviewed-by: Lyude Paul Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20211005202322.700909-3-hdegoede@redhat.com --- include/drm/drm_privacy_screen_consumer.h | 50 +++++++++++++++++++ include/drm/drm_privacy_screen_driver.h | 80 +++++++++++++++++++++++++++++++ include/drm/drm_privacy_screen_machine.h | 41 ++++++++++++++++ 3 files changed, 171 insertions(+) create mode 100644 include/drm/drm_privacy_screen_consumer.h create mode 100644 include/drm/drm_privacy_screen_driver.h create mode 100644 include/drm/drm_privacy_screen_machine.h (limited to 'include') diff --git a/include/drm/drm_privacy_screen_consumer.h b/include/drm/drm_privacy_screen_consumer.h new file mode 100644 index 000000000000..0cbd23b0453d --- /dev/null +++ b/include/drm/drm_privacy_screen_consumer.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2020 Red Hat, Inc. + * + * Authors: + * Hans de Goede + */ + +#ifndef __DRM_PRIVACY_SCREEN_CONSUMER_H__ +#define __DRM_PRIVACY_SCREEN_CONSUMER_H__ + +#include +#include + +struct drm_privacy_screen; + +#if IS_ENABLED(CONFIG_DRM_PRIVACY_SCREEN) +struct drm_privacy_screen *drm_privacy_screen_get(struct device *dev, + const char *con_id); +void drm_privacy_screen_put(struct drm_privacy_screen *priv); + +int drm_privacy_screen_set_sw_state(struct drm_privacy_screen *priv, + enum drm_privacy_screen_status sw_state); +void drm_privacy_screen_get_state(struct drm_privacy_screen *priv, + enum drm_privacy_screen_status *sw_state_ret, + enum drm_privacy_screen_status *hw_state_ret); +#else +static inline struct drm_privacy_screen *drm_privacy_screen_get(struct device *dev, + const char *con_id) +{ + return ERR_PTR(-ENODEV); +} +static inline void drm_privacy_screen_put(struct drm_privacy_screen *priv) +{ +} +static inline int drm_privacy_screen_set_sw_state(struct drm_privacy_screen *priv, + enum drm_privacy_screen_status sw_state) +{ + return -ENODEV; +} +static inline void drm_privacy_screen_get_state(struct drm_privacy_screen *priv, + enum drm_privacy_screen_status *sw_state_ret, + enum drm_privacy_screen_status *hw_state_ret) +{ + *sw_state_ret = PRIVACY_SCREEN_DISABLED; + *hw_state_ret = PRIVACY_SCREEN_DISABLED; +} +#endif + +#endif diff --git a/include/drm/drm_privacy_screen_driver.h b/include/drm/drm_privacy_screen_driver.h new file mode 100644 index 000000000000..5187ae52eb03 --- /dev/null +++ b/include/drm/drm_privacy_screen_driver.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2020 Red Hat, Inc. + * + * Authors: + * Hans de Goede + */ + +#ifndef __DRM_PRIVACY_SCREEN_DRIVER_H__ +#define __DRM_PRIVACY_SCREEN_DRIVER_H__ + +#include +#include +#include +#include + +struct drm_privacy_screen; + +/** + * struct drm_privacy_screen_ops - drm_privacy_screen operations + * + * Defines the operations which the privacy-screen class code may call. + * These functions should be implemented by the privacy-screen driver. + */ +struct drm_privacy_screen_ops { + /** + * @set_sw_state: Called to request a change of the privacy-screen + * state. The privacy-screen class code contains a check to avoid this + * getting called when the hw_state reports the state is locked. + * It is the driver's responsibility to update sw_state and hw_state. + * This is always called with the drm_privacy_screen's lock held. + */ + int (*set_sw_state)(struct drm_privacy_screen *priv, + enum drm_privacy_screen_status sw_state); + /** + * @get_hw_state: Called to request that the driver gets the current + * privacy-screen state from the hardware and then updates sw_state and + * hw_state accordingly. This will be called by the core just before + * the privacy-screen is registered in sysfs. + */ + void (*get_hw_state)(struct drm_privacy_screen *priv); +}; + +/** + * struct drm_privacy_screen - central privacy-screen structure + * + * Central privacy-screen structure, this contains the struct device used + * to register the screen in sysfs, the screen's state, ops, etc. + */ +struct drm_privacy_screen { + /** @dev: device used to register the privacy-screen in sysfs. */ + struct device dev; + /** @lock: mutex protection all fields in this struct. */ + struct mutex lock; + /** @list: privacy-screen devices list list-entry. */ + struct list_head list; + /** + * @ops: &struct drm_privacy_screen_ops for this privacy-screen. + * This is NULL if the driver has unregistered the privacy-screen. + */ + const struct drm_privacy_screen_ops *ops; + /** + * @sw_state: The privacy-screen's software state, see + * :ref:`Standard Connector Properties` + * for more info. + */ + enum drm_privacy_screen_status sw_state; + /** + * @hw_state: The privacy-screen's hardware state, see + * :ref:`Standard Connector Properties` + * for more info. + */ + enum drm_privacy_screen_status hw_state; +}; + +struct drm_privacy_screen *drm_privacy_screen_register( + struct device *parent, const struct drm_privacy_screen_ops *ops); +void drm_privacy_screen_unregister(struct drm_privacy_screen *priv); + +#endif diff --git a/include/drm/drm_privacy_screen_machine.h b/include/drm/drm_privacy_screen_machine.h new file mode 100644 index 000000000000..aaa0d38cce92 --- /dev/null +++ b/include/drm/drm_privacy_screen_machine.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2020 Red Hat, Inc. + * + * Authors: + * Hans de Goede + */ + +#ifndef __DRM_PRIVACY_SCREEN_MACHINE_H__ +#define __DRM_PRIVACY_SCREEN_MACHINE_H__ + +#include + +/** + * struct drm_privacy_screen_lookup - static privacy-screen lookup list entry + * + * Used for the static lookup-list for mapping privacy-screen consumer + * dev-connector pairs to a privacy-screen provider. + */ +struct drm_privacy_screen_lookup { + /** @list: Lookup list list-entry. */ + struct list_head list; + /** @dev_id: Consumer device name or NULL to match all devices. */ + const char *dev_id; + /** @con_id: Consumer connector name or NULL to match all connectors. */ + const char *con_id; + /** @provider: dev_name() of the privacy_screen provider. */ + const char *provider; +}; + +void drm_privacy_screen_lookup_add(struct drm_privacy_screen_lookup *lookup); +void drm_privacy_screen_lookup_remove(struct drm_privacy_screen_lookup *lookup); + +static inline void drm_privacy_screen_lookup_init(void) +{ +} +static inline void drm_privacy_screen_lookup_exit(void) +{ +} + +#endif -- cgit v1.2.3 From befe5404a00b3b1547c944738df4a9229909bdc9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 5 Oct 2021 22:23:15 +0200 Subject: drm/privacy-screen: Add X86 specific arch init code Add X86 specific arch init code, which fills the privacy-screen lookup table by checking for various vendor specific ACPI interfaces for controlling the privacy-screen. This initial version only checks for the Lenovo Thinkpad specific ACPI methods for privacy-screen control. Reviewed-by: Emil Velikov Reviewed-by: Lyude Paul Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20211005202322.700909-4-hdegoede@redhat.com --- include/drm/drm_privacy_screen_machine.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/drm_privacy_screen_machine.h b/include/drm/drm_privacy_screen_machine.h index aaa0d38cce92..02e5371904d3 100644 --- a/include/drm/drm_privacy_screen_machine.h +++ b/include/drm/drm_privacy_screen_machine.h @@ -31,11 +31,16 @@ struct drm_privacy_screen_lookup { void drm_privacy_screen_lookup_add(struct drm_privacy_screen_lookup *lookup); void drm_privacy_screen_lookup_remove(struct drm_privacy_screen_lookup *lookup); +#if IS_ENABLED(CONFIG_DRM_PRIVACY_SCREEN) && IS_ENABLED(CONFIG_X86) +void drm_privacy_screen_lookup_init(void); +void drm_privacy_screen_lookup_exit(void); +#else static inline void drm_privacy_screen_lookup_init(void) { } static inline void drm_privacy_screen_lookup_exit(void) { } +#endif #endif -- cgit v1.2.3 From 8a12b170558aabb31cc98fda0da6a56b518cadaa Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 5 Oct 2021 22:23:16 +0200 Subject: drm/privacy-screen: Add notifier support (v2) Add support for privacy-screen consumers to register a notifier to be notified of external (e.g. done by the hw itself on a hotkey press) state changes. Changes in v2: - Drop WARN_ON(mutex_is_locked(&priv->lock)) check in drm_privacy_screen_call_notifier_chain() it may be locked by another thread, which would lead to a false-positive triggering of the check Reviewed-by: Emil Velikov Reviewed-by: Lyude Paul Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20211005202322.700909-5-hdegoede@redhat.com --- include/drm/drm_privacy_screen_consumer.h | 15 +++++++++++++++ include/drm/drm_privacy_screen_driver.h | 4 ++++ 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/drm/drm_privacy_screen_consumer.h b/include/drm/drm_privacy_screen_consumer.h index 0cbd23b0453d..7f66a90d15b7 100644 --- a/include/drm/drm_privacy_screen_consumer.h +++ b/include/drm/drm_privacy_screen_consumer.h @@ -24,6 +24,11 @@ int drm_privacy_screen_set_sw_state(struct drm_privacy_screen *priv, void drm_privacy_screen_get_state(struct drm_privacy_screen *priv, enum drm_privacy_screen_status *sw_state_ret, enum drm_privacy_screen_status *hw_state_ret); + +int drm_privacy_screen_register_notifier(struct drm_privacy_screen *priv, + struct notifier_block *nb); +int drm_privacy_screen_unregister_notifier(struct drm_privacy_screen *priv, + struct notifier_block *nb); #else static inline struct drm_privacy_screen *drm_privacy_screen_get(struct device *dev, const char *con_id) @@ -45,6 +50,16 @@ static inline void drm_privacy_screen_get_state(struct drm_privacy_screen *priv, *sw_state_ret = PRIVACY_SCREEN_DISABLED; *hw_state_ret = PRIVACY_SCREEN_DISABLED; } +static inline int drm_privacy_screen_register_notifier(struct drm_privacy_screen *priv, + struct notifier_block *nb) +{ + return -ENODEV; +} +static inline int drm_privacy_screen_unregister_notifier(struct drm_privacy_screen *priv, + struct notifier_block *nb) +{ + return -ENODEV; +} #endif #endif diff --git a/include/drm/drm_privacy_screen_driver.h b/include/drm/drm_privacy_screen_driver.h index 5187ae52eb03..24591b607675 100644 --- a/include/drm/drm_privacy_screen_driver.h +++ b/include/drm/drm_privacy_screen_driver.h @@ -54,6 +54,8 @@ struct drm_privacy_screen { struct mutex lock; /** @list: privacy-screen devices list list-entry. */ struct list_head list; + /** @notifier_head: privacy-screen notifier head. */ + struct blocking_notifier_head notifier_head; /** * @ops: &struct drm_privacy_screen_ops for this privacy-screen. * This is NULL if the driver has unregistered the privacy-screen. @@ -77,4 +79,6 @@ struct drm_privacy_screen *drm_privacy_screen_register( struct device *parent, const struct drm_privacy_screen_ops *ops); void drm_privacy_screen_unregister(struct drm_privacy_screen *priv); +void drm_privacy_screen_call_notifier_chain(struct drm_privacy_screen *priv); + #endif -- cgit v1.2.3 From 334f74ee85dc26a50c1a2b0da82517595191f92f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 5 Oct 2021 22:23:17 +0200 Subject: drm/connector: Add a drm_connector privacy-screen helper functions (v2) Add 2 drm_connector privacy-screen helper functions: 1. drm_connector_attach_privacy_screen_provider(), this function creates and attaches the standard privacy-screen properties and registers a generic notifier for generating sysfs-connector-status-events on external changes to the privacy-screen status. 2. drm_connector_update_privacy_screen(), update the privacy-screen's sw_state if the connector has a privacy-screen. Changes in v2: - Do not update connector->state->privacy_screen_sw_state on atomic-commits. - Change drm_connector_update_privacy_screen() to take drm_connector_state as argument instead of a full drm_atomic_state. This allows the helper to be called by drivers when they are enabling crtcs/encoders/connectors. Reviewed-by: Emil Velikov Reviewed-by: Lyude Paul Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20211005202322.700909-6-hdegoede@redhat.com --- include/drm/drm_connector.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 1acbcf0626ce..b83ba0285f29 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,7 @@ struct drm_encoder; struct drm_property; struct drm_property_blob; struct drm_printer; +struct drm_privacy_screen; struct edid; struct i2c_adapter; @@ -1439,6 +1441,12 @@ struct drm_connector { */ struct drm_property *max_bpc_property; + /** @privacy_screen: drm_privacy_screen for this connector, or NULL. */ + struct drm_privacy_screen *privacy_screen; + + /** @privacy_screen_notifier: privacy-screen notifier_block */ + struct notifier_block privacy_screen_notifier; + /** * @privacy_screen_sw_state_property: Optional atomic property for the * connector to control the integrated privacy screen. @@ -1776,6 +1784,9 @@ int drm_connector_attach_max_bpc_property(struct drm_connector *connector, int min, int max); void drm_connector_create_privacy_screen_properties(struct drm_connector *conn); void drm_connector_attach_privacy_screen_properties(struct drm_connector *conn); +void drm_connector_attach_privacy_screen_provider( + struct drm_connector *connector, struct drm_privacy_screen *priv); +void drm_connector_update_privacy_screen(const struct drm_connector_state *connector_state); /** * struct drm_tile_group - Tile group metadata -- cgit v1.2.3 From 7c4dd0a266527ffa7ed8d424facaba171618820a Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 13 Oct 2021 00:42:52 +0200 Subject: drm: of: Add drm_of_lvds_get_data_mapping Add helper function to convert DT "data-mapping" property string value into media bus format value, and deduplicate the code in panel-lvds.c and lvds-codec.c . Signed-off-by: Marek Vasut Cc: Laurent Pinchart Cc: Sam Ravnborg To: dri-devel@lists.freedesktop.org Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20211012224252.29185-1-marex@denx.de --- include/drm/drm_of.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/drm/drm_of.h b/include/drm/drm_of.h index b9b093add92e..99f79ac8b4cd 100644 --- a/include/drm/drm_of.h +++ b/include/drm/drm_of.h @@ -49,6 +49,7 @@ int drm_of_find_panel_or_bridge(const struct device_node *np, struct drm_bridge **bridge); int drm_of_lvds_get_dual_link_pixel_order(const struct device_node *port1, const struct device_node *port2); +int drm_of_lvds_get_data_mapping(const struct device_node *port); #else static inline uint32_t drm_of_crtc_port_mask(struct drm_device *dev, struct device_node *port) @@ -98,6 +99,12 @@ drm_of_lvds_get_dual_link_pixel_order(const struct device_node *port1, { return -EINVAL; } + +static inline int +drm_of_lvds_get_data_mapping(const struct device_node *port) +{ + return -EINVAL; +} #endif /* -- cgit v1.2.3 From ba3078dad1401131293664733c15f6f066079de9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 14 Oct 2021 18:00:57 +0300 Subject: drm/dp: add helpers to read link training delays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The link training delays are different and/or available in different DPCD offsets depending on: - Clock recovery vs. channel equalization - DPRX vs. LTTPR - 128b/132b vs. 8b/10b - DPCD 1.4+ vs. earlier Add helpers to get the correct delays in us, reading DPCD if necessary. This is more straightforward than trying to retrofit the existing helpers to take 128b/132b into account. Having to pass in the DPCD receiver cap field seems unavoidable, because reading it involves checking the revision and reading extended receiver cap. So unfortunately the interface is mixed cached and read as needed. v2: Remove delay_us < 0 check and the whole local var (Ville) Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20211014150059.28957-1-jani.nikula@intel.com --- include/drm/drm_dp_helper.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index a1df35aa6e68..b653c5da7065 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -1114,8 +1114,15 @@ struct drm_panel; # define DP_UHBR20 (1 << 1) # define DP_UHBR13_5 (1 << 2) -#define DP_128B132B_TRAINING_AUX_RD_INTERVAL 0x2216 /* 2.0 */ -# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_MASK 0x7f +#define DP_128B132B_TRAINING_AUX_RD_INTERVAL 0x2216 /* 2.0 */ +# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_MASK 0x7f +# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_400_US 0x00 +# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_4_MS 0x01 +# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_8_MS 0x02 +# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_12_MS 0x03 +# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_16_MS 0x04 +# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_32_MS 0x05 +# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_64_MS 0x06 #define DP_TEST_264BIT_CUSTOM_PATTERN_7_0 0x2230 #define DP_TEST_264BIT_CUSTOM_PATTERN_263_256 0x2250 @@ -1385,6 +1392,11 @@ enum drm_dp_phy { # define DP_VOLTAGE_SWING_LEVEL_3_SUPPORTED BIT(0) # define DP_PRE_EMPHASIS_LEVEL_3_SUPPORTED BIT(1) +#define DP_128B132B_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 0xf0022 /* 2.0 */ +#define DP_128B132B_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER(dp_phy) \ + DP_LTTPR_REG(dp_phy, DP_128B132B_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1) +/* see DP_128B132B_TRAINING_AUX_RD_INTERVAL for values */ + #define DP_LANE0_1_STATUS_PHY_REPEATER1 0xf0030 /* 1.3 */ #define DP_LANE0_1_STATUS_PHY_REPEATER(dp_phy) \ DP_LTTPR_REG(dp_phy, DP_LANE0_1_STATUS_PHY_REPEATER1) @@ -1521,6 +1533,11 @@ u8 drm_dp_get_adjust_request_post_cursor(const u8 link_status[DP_LINK_STATUS_SIZ #define DP_LTTPR_COMMON_CAP_SIZE 8 #define DP_LTTPR_PHY_CAP_SIZE 3 +int drm_dp_read_clock_recovery_delay(struct drm_dp_aux *aux, const u8 dpcd[DP_RECEIVER_CAP_SIZE], + enum drm_dp_phy dp_phy, bool uhbr); +int drm_dp_read_channel_eq_delay(struct drm_dp_aux *aux, const u8 dpcd[DP_RECEIVER_CAP_SIZE], + enum drm_dp_phy dp_phy, bool uhbr); + void drm_dp_link_train_clock_recovery_delay(const struct drm_dp_aux *aux, const u8 dpcd[DP_RECEIVER_CAP_SIZE]); void drm_dp_lttpr_link_train_clock_recovery_delay(void); -- cgit v1.2.3 From dd66f56caea6bb1a3703fb3bfc3106444d05a930 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 21 Oct 2021 08:55:24 +0200 Subject: dma-buf: fix kerneldoc for renamed members MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those members where renamed, update the kerneldoc as well. Signed-off-by: Christian König Reviewed-by: Alex Deucher Acked-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20211021141945.84023-1-christian.koenig@amd.com --- include/linux/dma-buf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 02c2eb874da6..9807aef33685 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -433,8 +433,8 @@ struct dma_buf { /** @poll: for userspace poll support */ wait_queue_head_t poll; - /** @cb_excl: for userspace poll support */ - /** @cb_shared: for userspace poll support */ + /** @cb_in: for userspace poll support */ + /** @cb_out: for userspace poll support */ struct dma_buf_poll_cb_t { struct dma_fence_cb cb; wait_queue_head_t *poll; -- cgit v1.2.3 From 1705643faecde95bdeb11bea5ab5baed084e9f91 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 19 Oct 2021 05:30:20 +0800 Subject: mmc: add MT7921 SDIO identifiers for MediaTek Bluetooth devices The MT7961 SDIO identifier for MediaTek Bluetooth devices were being referred in the MediaTek Bluetooth driver. Co-developed-by: Mark-yw Chen Signed-off-by: Mark-yw Chen Signed-off-by: Sean Wang Signed-off-by: Marcel Holtmann --- include/linux/mmc/sdio_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index a85c9f0bd470..53f0efa0bccf 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -105,6 +105,7 @@ #define SDIO_VENDOR_ID_MEDIATEK 0x037a #define SDIO_DEVICE_ID_MEDIATEK_MT7663 0x7663 #define SDIO_DEVICE_ID_MEDIATEK_MT7668 0x7668 +#define SDIO_DEVICE_ID_MEDIATEK_MT7961 0x7961 #define SDIO_VENDOR_ID_MICROCHIP_WILC 0x0296 #define SDIO_DEVICE_ID_MICROCHIP_WILC1000 0x5347 -- cgit v1.2.3 From 3ab7b6ac5d829e60c3b89d415811ff1c9f358c8e Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 25 Oct 2021 10:09:23 -0700 Subject: pwm: Introduce single-PWM of_xlate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing pxa driver and the upcoming addition of PWM support in the TI sn565dsi86 DSI/eDP bridge driver both has a single PWM channel and thereby a need for a of_xlate function with the period as its single argument. Introduce a common helper function in the core that can be used as of_xlate by such drivers and migrate the pxa driver to use this. Signed-off-by: Bjorn Andersson Acked-by: Uwe Kleine-König Tested-by: Steev Klimaszewski Tested-By: Steev Klimaszewski Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20211025170925.3096444-1-bjorn.andersson@linaro.org --- include/linux/pwm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 725c9b784e60..dd51d4931fdc 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -414,6 +414,8 @@ struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args); +struct pwm_device *of_pwm_single_xlate(struct pwm_chip *pc, + const struct of_phandle_args *args); struct pwm_device *pwm_get(struct device *dev, const char *con_id); struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np, -- cgit v1.2.3 From 6a98e3836fa2077b169f10a35c2ca9952d53f987 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 27 Oct 2021 16:58:38 -0700 Subject: Bluetooth: Add helper for serialized HCI command execution The usage of __hci_cmd_sync() within the hdev->setup() callback allows for a nice and simple serialized execution of HCI commands. More importantly it allows for result processing before issueing the next command. With the current usage of hci_req_run() it is possible to batch up commands and execute them, but it is impossible to react to their results or errors. This is an attempt to generalize the hdev->setup() handling and provide a simple way of running multiple HCI commands from a single function context. There are multiple struct work that are decdicated to certain tasks already used right now. It is add a lot of bloat to hci_dev struct and extra handling code. So it might be possible to put all of these behind a common HCI command infrastructure and just execute the HCI commands from the same work context in a serialized fashion. For example updating the white list and resolving list can be done now without having to know the list size ahead of time. Also preparing for suspend or resume shouldn't require a state machine anymore. There are other tasks that should be simplified as well. Signed-off-by: Marcel Holtmann Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 11 ++++------- include/net/bluetooth/hci_sync.h | 42 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 7 deletions(-) create mode 100644 include/net/bluetooth/hci_sync.h (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index dd8840e70e25..17ddea51d161 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -30,6 +30,7 @@ #include #include +#include #include /* HCI priority */ @@ -475,6 +476,9 @@ struct hci_dev { struct work_struct power_on; struct delayed_work power_off; struct work_struct error_reset; + struct work_struct cmd_sync_work; + struct list_head cmd_sync_work_list; + struct mutex cmd_sync_work_lock; __u16 discov_timeout; struct delayed_work discov_off; @@ -1690,10 +1694,6 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency, int hci_register_cb(struct hci_cb *hcb); int hci_unregister_cb(struct hci_cb *hcb); -struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u32 timeout); -struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u8 event, u32 timeout); int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); @@ -1704,9 +1704,6 @@ void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb); void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode); -struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u32 timeout); - u32 hci_conn_get_phy(struct hci_conn *conn); /* ----- HCI Sockets ----- */ diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h new file mode 100644 index 000000000000..fcfdeb3cbd7c --- /dev/null +++ b/include/net/bluetooth/hci_sync.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * BlueZ - Bluetooth protocol stack for Linux + * + * Copyright (C) 2021 Intel Corporation + */ + +typedef int (*hci_cmd_sync_work_func_t)(struct hci_dev *hdev, void *data); +typedef void (*hci_cmd_sync_work_destroy_t)(struct hci_dev *hdev, void *data, + int err); + +struct hci_cmd_sync_work_entry { + struct list_head list; + hci_cmd_sync_work_func_t func; + void *data; + hci_cmd_sync_work_destroy_t destroy; +}; + +/* Function with sync suffix shall not be called with hdev->lock held as they + * wait the command to complete and in the meantime an event could be received + * which could attempt to acquire hdev->lock causing a deadlock. + */ +struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout); +struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout); +struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u8 event, u32 timeout); +struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u8 event, u32 timeout, + struct sock *sk); +int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout); +int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u8 event, u32 timeout, + struct sock *sk); + +void hci_cmd_sync_init(struct hci_dev *hdev); +void hci_cmd_sync_clear(struct hci_dev *hdev); + +int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); -- cgit v1.2.3 From 161510ccf91c961638940b03abb1ee804be53a97 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 27 Oct 2021 16:58:39 -0700 Subject: Bluetooth: hci_sync: Make use of hci_cmd_sync_queue set 1 This make use of hci_cmd_sync_queue for the following MGMT commands: Set Device Class Set Device ID Add UUID Remove UUID tools/mgmt-tester -s "Set Device Class" Test Summary ------------ Set Device Class - Success 1 Passed Set Device Class - Success 2 Passed Set Device Class - Invalid parameters 1 Passed Total: 3, Passed: 3 (100.0%), Failed: 0, Not Run: 0 Overall execution time: 0.0599 seconds tools/mgmt-tester -s "Set Device ID" Test Summary ------------ Set Device ID - Success 1 Passed Set Device ID - Success 2 Passed Set Device ID - Disable Passed Set Device ID - Power off and Power on Passed Set Device ID - SSP off and Power on Passed Set Device ID - Invalid Parameter Passed Total: 6, Passed: 6 (100.0%), Failed: 0, Not Run: 0 Overall execution time: 0.107 seconds tools/mgmt-tester -s "Add UUID" Test Summary ------------ Add UUID - UUID-16 1 Passed Add UUID - UUID-16 multiple 1 Passed Add UUID - UUID-16 partial 1 Passed Add UUID - UUID-32 1 Passed Add UUID - UUID-32 multiple 1 Passed Add UUID - UUID-32 partial 1 Passed Add UUID - UUID-128 1 Passed Add UUID - UUID-128 multiple 1 Passed Add UUID - UUID-128 partial 1 Passed Add UUID - UUID mix Passed Total: 10, Passed: 10 (100.0%), Failed: 0, Not Run: 0 Overall execution time: 0.198 seconds tools/mgmt-tester -s "Remove UUID" Test Summary ------------ Remove UUID - Success 1 Passed Remove UUID - All UUID - Success 2 Passed Remove UUID - Power Off - Success 3 Passed Remove UUID - Power Off and On - Success 4 Passed Remove UUID - Not Exist - Invalid Params 1 Passed Total: 5, Passed: 5 (100.0%), Failed: 0, Not Run: 0 Overall execution time: 0.0908 seconds Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_sync.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index fcfdeb3cbd7c..fe77ff97bc8e 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -40,3 +40,6 @@ void hci_cmd_sync_clear(struct hci_dev *hdev); int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); + +int hci_update_eir_sync(struct hci_dev *hdev); +int hci_update_class_sync(struct hci_dev *hdev); -- cgit v1.2.3 From cba6b758711cab946c787f7c15be92cc749b8e1f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 27 Oct 2021 16:58:40 -0700 Subject: Bluetooth: hci_sync: Make use of hci_cmd_sync_queue set 2 This make use of hci_cmd_sync_queue for the following MGMT commands: Add Advertising Remove Advertising Add Extended Advertising Parameters Add Extended Advertising Data mgmt-tester -s "Add Advertising" Test Summary ------------ Add Advertising - Failure: LE off Passed Add Advertising - Invalid Params 1 (AD too long) Passed Add Advertising - Invalid Params 2 (Malformed len) Passed Add Advertising - Invalid Params 3 (Malformed len) Passed Add Advertising - Invalid Params 4 (Malformed len) Passed Add Advertising - Invalid Params 5 (AD too long) Passed Add Advertising - Invalid Params 6 (ScRsp too long) Passed Add Advertising - Invalid Params 7 (Malformed len) Passed Add Advertising - Invalid Params 8 (Malformed len) Passed Add Advertising - Invalid Params 9 (Malformed len) Passed Add Advertising - Invalid Params 10 (ScRsp too long) Passed Add Advertising - Rejected (Timeout, !Powered) Passed Add Advertising - Success 1 (Powered, Add Adv Inst) Passed Add Advertising - Success 2 (!Powered, Add Adv Inst) Passed Add Advertising - Success 3 (!Powered, Adv Enable) Passed Add Advertising - Success 4 (Set Adv on override) Passed Add Advertising - Success 5 (Set Adv off override) Passed Add Advertising - Success 6 (Scan Rsp Dta, Adv ok) Passed Add Advertising - Success 7 (Scan Rsp Dta, Scan ok) Passed Add Advertising - Success 8 (Connectable Flag) Passed Add Advertising - Success 9 (General Discov Flag) Passed Add Advertising - Success 10 (Limited Discov Flag) Passed Add Advertising - Success 11 (Managed Flags) Passed Add Advertising - Success 12 (TX Power Flag) Passed Add Advertising - Success 13 (ADV_SCAN_IND) Passed Add Advertising - Success 14 (ADV_NONCONN_IND) Passed Add Advertising - Success 15 (ADV_IND) Passed Add Advertising - Success 16 (Connectable -> on) Passed Add Advertising - Success 17 (Connectable -> off) Passed Add Advertising - Success 18 (Power -> off, Remove) Passed Add Advertising - Success 19 (Power -> off, Keep) Passed Add Advertising - Success 20 (Add Adv override) Passed Add Advertising - Success 21 (Timeout expires) Passed Add Advertising - Success 22 (LE -> off, Remove) Passed Add Advertising - Success (Empty ScRsp) Passed Add Advertising - Success (ScRsp only) Passed Add Advertising - Invalid Params (ScRsp too long) Passed Add Advertising - Success (ScRsp appear) Passed Add Advertising - Invalid Params (ScRsp appear long) Passed Add Advertising - Success (Appear is null) Passed Add Advertising - Success (Name is null) Passed Add Advertising - Success (Complete name) Passed Add Advertising - Success (Shortened name) Passed Add Advertising - Success (Short name) Passed Add Advertising - Success (Name + data) Passed Add Advertising - Invalid Params (Name + data) Passed Add Advertising - Success (Name+data+appear) Passed Total: 47, Passed: 47 (100.0%), Failed: 0, Not Run: 0 Overall execution time: 2.17 seconds mgmt-tester -s "Remove Advertising" Test Summary ------------ Remove Advertising - Invalid Params 1 Passed Remove Advertising - Success 1 Passed Remove Advertising - Success 2 Passed Total: 3, Passed: 3 (100.0%), Failed: 0, Not Run: 0 Overall execution time: 0.0585 seconds mgmt-tester -s "Ext Adv MGMT Params" Test Summary: ------------ Ext Adv MGMT Params - Unpowered Passed Ext Adv MGMT Params - Invalid parameters Passed Ext Adv MGMT Params - Success Passed Ext Adv MGMT Params - (5.0) Success Passed Total: 4, Passed: 4 (100.0%), Failed: 0, Not Run: 0 Overall execution time: 0.0746 seconds mgmt-tester -s "Ext Adv MGMT -" Test Summary ------------ Ext Adv MGMT - Data set without Params Passed Ext Adv MGMT - AD Data (5.0) Invalid parameters Passed Ext Adv MGMT - AD Data (5.0) Success Passed Ext Adv MGMT - AD Scan Response (5.0) Success Passed Total: 4, Passed: 4 (100.0%), Failed: 0, Not Run: 0 Overall execution time: 0.0805 seconds Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 2 ++ include/net/bluetooth/hci_sync.h | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 3271870fd85e..2f31e571f34c 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -380,6 +380,7 @@ typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status, #define HCI_REQ_SKB BIT(1) struct hci_ctrl { + struct sock *sk; u16 opcode; u8 req_flags; u8 req_event; @@ -405,6 +406,7 @@ struct bt_skb_cb { #define hci_skb_pkt_type(skb) bt_cb((skb))->pkt_type #define hci_skb_expect(skb) bt_cb((skb))->expect #define hci_skb_opcode(skb) bt_cb((skb))->hci.opcode +#define hci_skb_sk(skb) bt_cb((skb))->hci.sk static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how) { diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index fe77ff97bc8e..143829cd1b29 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -43,3 +43,23 @@ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, int hci_update_eir_sync(struct hci_dev *hdev); int hci_update_class_sync(struct hci_dev *hdev); + +int hci_update_eir_sync(struct hci_dev *hdev); +int hci_update_class_sync(struct hci_dev *hdev); + +int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy, + bool rpa, u8 *own_addr_type); + +int hci_update_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance); +int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance); +int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance, + bool force); + +int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance); +int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance); +int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance); +int hci_enable_advertising_sync(struct hci_dev *hdev); + +int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk, + u8 instance, bool force); +int hci_disable_advertising_sync(struct hci_dev *hdev); -- cgit v1.2.3 From e8907f76544ffe225ab95d70f7313267b1d0c76d Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 27 Oct 2021 16:58:41 -0700 Subject: Bluetooth: hci_sync: Make use of hci_cmd_sync_queue set 3 This make use of hci_cmd_sync_queue for the following MGMT commands: Add Device Remove Device Tested with: mgmt-tester -s "Add Device" Test Summary ------------ Add Device - Invalid Params 1 Passed Add Device - Invalid Params 2 Passed Add Device - Invalid Params 3 Passed Add Device - Invalid Params 4 Passed Add Device - Success 1 Passed Add Device - Success 2 Passed Add Device - Success 3 Passed Add Device - Success 4 Passed Add Device - Success 5 Passed Add Device - Success 6 - Add to whitelist Passed Add Device - Success 7 - Add to resolv list Passed Add Device - Success 8 - Enable resolv list Passed Total: 12, Passed: 12 (100.0%), Failed: 0, Not Run: 0 Overall execution time: 0.209 seconds mgmt-tester -s "Remove Device" Test Summary ------------ Remove Device - Invalid Params 1 Passed Remove Device - Invalid Params 2 Passed Remove Device - Invalid Params 3 Passed Remove Device - Success 1 Passed Remove Device - Success 2 Passed Remove Device - Success 3 Passed Remove Device - Success 4 Passed Remove Device - Success 5 Passed Remove Device - Success 6 - All Devices Passed Remove Device - Success 7 - Remove from whitelist Passed Remove Device - Success 8 - Remove from resolv list Passed Total: 11, Passed: 11 (100.0%), Failed: 0, Not Run: 0 Overall execution time: 4.26 seconds Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_sync.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 143829cd1b29..b0708f900d89 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -63,3 +63,5 @@ int hci_enable_advertising_sync(struct hci_dev *hdev); int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk, u8 instance, bool force); int hci_disable_advertising_sync(struct hci_dev *hdev); + +int hci_update_passive_scan_sync(struct hci_dev *hdev); -- cgit v1.2.3 From ad383c2c65a5baf16e334cd40a013cc302176891 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 27 Oct 2021 16:58:42 -0700 Subject: Bluetooth: hci_sync: Enable advertising when LL privacy is enabled This enables advertising when LL privacy is enabled and changes the command sequence when resolving list is updated to also account for when advertising is enabled using the following sequence: If there are devices to scan: Disable Scanning -> Update Accept List -> use_ll_privacy((Disable Advertising) -> Disable Resolving List -> Update Resolving List -> Enable Resolving List -> (Enable Advertising)) -> Enable Scanning Otherwise: Disable Scanning Errors during the Update Accept List stage are handled gracefully by restoring any previous state (e.g. advertising) and disabling the use of accept list as either accept list or resolving list could not be updated. Tested with: mgmt-tester -s "LL Privacy" Test Summary ------------ LL Privacy - Add Device 1 (Add to WL) Passed LL Privacy - Add Device 2 (Add to RL) Passed LL Privacy - Add Device 3 (Enable RL) Passed LL Privacy - Add Device 4 (2 Devices to WL) Passed LL Privacy - Add Device 5 (2 Devices to RL) Passed LL Privacy - Add Device 6 (RL is full) Passed LL Privacy - Add Device 7 (WL is full) Passed LL Privacy - Add Device 8 (Disable Adv) Passed LL Privacy - Add Device 9 (Multi Adv) Passed LL Privacy - Add Device 10 (Multi Dev and Multi Adv) Passed LL Privacy - Remove Device 1 (Remove from WL) Passed LL Privacy - Remove Device 2 (Remove from RL) Passed LL Privacy - Remove Device 3 (Disable RL) Passed LL Privacy - Remove Device 4 (Disable Adv) Passed LL Privacy - Remove Device 5 (Multi Adv) Passed LL Privacy - Start Discovery 1 (Disable RL) Passed LL Privacy - Start Discovery 2 (Disable RL) Passed Total: 18, Passed: 18 (100.0%), Failed: 0, Not Run: 0 Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 5 ++++- include/net/bluetooth/hci_sync.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 17ddea51d161..a6b075203cbe 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1465,8 +1465,11 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED)) +#define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) + /* Use LL Privacy based address resolution if supported */ -#define use_ll_privacy(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) +#define use_ll_privacy(dev) (ll_privacy_capable(dev) && \ + hci_dev_test_flag(dev, HCI_ENABLE_LL_PRIVACY)) /* Use enhanced synchronous connection if command is supported */ #define enhanced_sco_capable(dev) ((dev)->commands[29] & 0x08) diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index b0708f900d89..ec727eb18e90 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -65,3 +65,4 @@ int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk, int hci_disable_advertising_sync(struct hci_dev *hdev); int hci_update_passive_scan_sync(struct hci_dev *hdev); +int hci_update_passive_scan(struct hci_dev *hdev); -- cgit v1.2.3 From cf75ad8b41d2aa06f98f365d42a3ae8b059daddd Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 27 Oct 2021 16:58:44 -0700 Subject: Bluetooth: hci_sync: Convert MGMT_SET_POWERED This make use of hci_cmd_sync_queue when MGMT_SET_POWERED is used so all commands are run within hdev->cmd_sync_work instead of hdev->power_on_work and hdev->power_off_work. In addition to that the power on sequence now takes into account if local IRK needs to be programmed in the resolving list. Tested with: tools/mgmt-tester -s "Set powered" Test Summary ------------ Set powered on - Success Passed Set powered on - Invalid parameters 1 Passed Set powered on - Invalid parameters 2 Passed Set powered on - Invalid parameters 3 Passed Set powered on - Invalid index Passed Set powered on - Privacy and Advertising Passed Set powered off - Success Passed Set powered off - Class of Device Passed Set powered off - Invalid parameters 1 Passed Set powered off - Invalid parameters 2 Passed Set powered off - Invalid parameters 3 Passed Total: 11, Passed: 11 (100.0%), Failed: 0, Not Run: 0 Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_sync.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index ec727eb18e90..00203077e656 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -66,3 +66,9 @@ int hci_disable_advertising_sync(struct hci_dev *hdev); int hci_update_passive_scan_sync(struct hci_dev *hdev); int hci_update_passive_scan(struct hci_dev *hdev); + +int hci_dev_open_sync(struct hci_dev *hdev); +int hci_dev_close_sync(struct hci_dev *hdev); + +int hci_powered_update_sync(struct hci_dev *hdev); +int hci_set_powered_sync(struct hci_dev *hdev, u8 val); -- cgit v1.2.3 From abfeea476c68afea54c9c050a2d3b19d5d2ee873 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 27 Oct 2021 16:58:45 -0700 Subject: Bluetooth: hci_sync: Convert MGMT_OP_START_DISCOVERY This make use of hci_cmd_sync_queue for MGMT_OP_START_DISCOVERY, MGMT_OP_START_SERVICE_DISCOVERY and MGMT_OP_STOP_DISCOVERY to use hci_cmd_sync_queue so they no longer depend on hdev->discov_update work to send any commands. Tested with: tools/mgmt-tester -s "Start Discovery" Test Summary ------------ Start Discovery - Not powered 1 Passed Start Discovery - Invalid parameters 1 Passed Start Discovery - Not supported 1 Passed Start Discovery - Success 1 Passed Start Discovery - Success 2 Passed Start Discovery - Power Off 1 Passed Start Discovery BREDR LE - (Ext Scan Enable) Passed Start Discovery LE - (Ext Scan Enable) Passed Start Discovery LE - (Ext Scan Param) Passed Start Discovery - (2m, Scan Param) Passed Start Discovery - (coded, Scan Param) Passed Start Discovery - (1m, 2m, coded, Scan Param) Passed LL Privacy - Start Discovery 1 (Disable RL) Passed LL Privacy - Start Discovery 2 (Disable RL) Passed Total: 14, Passed: 14 (100.0%), Failed: 0, Not Run: 0 tools/mgmt-tester -s "Start Service" Test Summary ------------ Start Service Discovery - Not powered 1 Passed Start Service Discovery - Invalid parameters 1 Passed Start Service Discovery - Not supported 1 Passed Start Service Discovery - Success 1 Passed Start Service Discovery - Success 2 Passed Total: 5, Passed: 5 (100.0%), Failed: 0, Not Run: 0 tools/mgmt-tester -s "Stop Discovery" Test Summary ------------ Stop Discovery - Success 1 Passed Stop Discovery - BR/EDR (Inquiry) Success 1 Passed Stop Discovery - Rejected 1 Passed Stop Discovery - Invalid parameters 1 Passed Stop Discovery - (Ext Scan Disable) Passed Total: 5, Passed: 5 (100.0%), Failed: 0, Not Run: 0 Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_sync.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 00203077e656..c4fa77321b31 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -59,6 +59,7 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance); int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance); int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance); int hci_enable_advertising_sync(struct hci_dev *hdev); +int hci_enable_advertising(struct hci_dev *hdev); int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk, u8 instance, bool force); @@ -72,3 +73,6 @@ int hci_dev_close_sync(struct hci_dev *hdev); int hci_powered_update_sync(struct hci_dev *hdev); int hci_set_powered_sync(struct hci_dev *hdev, u8 val); + +int hci_start_discovery_sync(struct hci_dev *hdev); +int hci_stop_discovery_sync(struct hci_dev *hdev); -- cgit v1.2.3 From 353a0249c3f60365c55d53e2d068de4f43669a22 Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Wed, 27 Oct 2021 16:58:46 -0700 Subject: Bluetooth: hci_sync: Convert MGMT_OP_SET_FAST_CONNECTABLE This creates a synchronized Write Fast Connectable call and attaches it to the MGMT_OP_SET_FAST_CONNECTABLE management opcode. Signed-off-by: Brian Gix Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_sync.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index c4fa77321b31..1fb66b6f8a34 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -68,6 +68,8 @@ int hci_disable_advertising_sync(struct hci_dev *hdev); int hci_update_passive_scan_sync(struct hci_dev *hdev); int hci_update_passive_scan(struct hci_dev *hdev); +int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable); + int hci_dev_open_sync(struct hci_dev *hdev); int hci_dev_close_sync(struct hci_dev *hdev); -- cgit v1.2.3 From 451d95a98c5a350da2f2c5447cc17115a5b94c8e Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Wed, 27 Oct 2021 16:58:47 -0700 Subject: Bluetooth: hci_sync: Enable synch'd set_bredr Uses previously written: hci_write_fast_connectable_sync hci_update_scan_sync hci_update_adv_data_sync Signed-off-by: Brian Gix Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_sync.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 1fb66b6f8a34..03ffe95415fc 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -69,6 +69,7 @@ int hci_update_passive_scan_sync(struct hci_dev *hdev); int hci_update_passive_scan(struct hci_dev *hdev); int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable); +int hci_update_scan_sync(struct hci_dev *hdev); int hci_dev_open_sync(struct hci_dev *hdev); int hci_dev_close_sync(struct hci_dev *hdev); -- cgit v1.2.3 From 47db6b42991e6d5645d0938e43085aaf88cdfba4 Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Wed, 27 Oct 2021 16:58:48 -0700 Subject: Bluetooth: hci_sync: Convert MGMT_OP_GET_CONN_INFO Synchronous version of MGMT_OP_GET_CONN_INFO Implements: hci_read_rssi_sync hci_read_tx_power_sync Signed-off-by: Brian Gix Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_sync.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 03ffe95415fc..156cb5faf64f 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -67,6 +67,8 @@ int hci_disable_advertising_sync(struct hci_dev *hdev); int hci_update_passive_scan_sync(struct hci_dev *hdev); int hci_update_passive_scan(struct hci_dev *hdev); +int hci_read_rssi_sync(struct hci_dev *hdev, __le16 handle); +int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type); int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable); int hci_update_scan_sync(struct hci_dev *hdev); -- cgit v1.2.3 From 2f2eb0c9de2eb69969aaf04feffb69310d3804b2 Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Wed, 27 Oct 2021 16:58:49 -0700 Subject: Bluetooth: hci_sync: Convert MGMT_OP_SET_SECURE_CONN Synchronous version of MGMT_OP_SET_SECURE_CONN. Signed-off-by: Brian Gix Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_sync.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 156cb5faf64f..4b27a89cc57e 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -69,6 +69,7 @@ int hci_update_passive_scan_sync(struct hci_dev *hdev); int hci_update_passive_scan(struct hci_dev *hdev); int hci_read_rssi_sync(struct hci_dev *hdev, __le16 handle); int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type); +int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val); int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable); int hci_update_scan_sync(struct hci_dev *hdev); -- cgit v1.2.3 From 5a750137466400276560458db040c143cbc254ed Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Wed, 27 Oct 2021 16:58:50 -0700 Subject: Bluetooth: hci_sync: Convert MGMT_OP_GET_CLOCK_INFO Synchronous version of MGMT_OP_GET_CLOCK_INFO. Signed-off-by: Brian Gix Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_sync.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 4b27a89cc57e..cf54f8f14edb 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -70,6 +70,7 @@ int hci_update_passive_scan(struct hci_dev *hdev); int hci_read_rssi_sync(struct hci_dev *hdev, __le16 handle); int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type); int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val); +int hci_read_clock_sync(struct hci_dev *hdev, struct hci_cp_read_clock *cp); int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable); int hci_update_scan_sync(struct hci_dev *hdev); -- cgit v1.2.3 From d81a494c43df66f053f7d1ec612e057eb99f34d4 Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Wed, 27 Oct 2021 16:58:51 -0700 Subject: Bluetooth: hci_sync: Convert MGMT_OP_SET_LE Uses: hci_disable_advertising_sync hci_remove_ext_adv_instance_sync hci_write_le_host_supported_sync hci_setup_ext_adv_instance_sync hci_update_scan_rsp_data_sync Signed-off-by: Brian Gix Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_sync.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index cf54f8f14edb..d969693c33b5 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -75,6 +75,10 @@ int hci_read_clock_sync(struct hci_dev *hdev, struct hci_cp_read_clock *cp); int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable); int hci_update_scan_sync(struct hci_dev *hdev); +int hci_write_le_host_supported_sync(struct hci_dev *hdev, u8 le, u8 simul); +int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance, + struct sock *sk); + int hci_dev_open_sync(struct hci_dev *hdev); int hci_dev_close_sync(struct hci_dev *hdev); -- cgit v1.2.3 From f892244b05bf6a99e48db14f8bbd96db16bcfa69 Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Wed, 27 Oct 2021 16:58:52 -0700 Subject: Bluetooth: hci_sync: Convert MGMT_OP_READ_LOCAL_OOB_DATA New functions: hci_read_local_oob_data_sync This function requires all of the data from the cmd cmplt event to be passed up to the caller via the skb. mgmt-tester paths: Read Local OOB Data - Not powered Read Local OOB Data - Legacy pairing Read Local OOB Data - Success SSP Read Local OOB Data - Success SC Signed-off-by: Brian Gix Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_sync.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index d969693c33b5..a381621a56a1 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -78,6 +78,8 @@ int hci_update_scan_sync(struct hci_dev *hdev); int hci_write_le_host_supported_sync(struct hci_dev *hdev, u8 le, u8 simul); int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance, struct sock *sk); +struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, bool ext, + struct sock *sk); int hci_dev_open_sync(struct hci_dev *hdev); int hci_dev_close_sync(struct hci_dev *hdev); -- cgit v1.2.3 From 6f6ff38a1e140d7ca3a733556b0027eec1fe4ef1 Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Wed, 27 Oct 2021 16:58:54 -0700 Subject: Bluetooth: hci_sync: Convert MGMT_OP_SET_LOCAL_NAME Uses existing *_sync functions, but made hci_update_name_sync non-static. mgmt-test paths: Set Advertising on - Local name 1 Set Advertising on - Name + Appear 1 Set Local Name - Success 1 Set Local Name - Success 2 Set Local Name - Success 3 Add Advertising - Success (Empty ScRsp) Add Advertising - Success (Complete name) Add Advertising - Success (Shortened name) Add Advertising - Success (Short name) Add Advertising - Success (Name + data) Add Advertising - Invalid Params (Name + data) Add Advertising - Success (Name+data+appear) Read Ext Controller Info 3 Read Ext Controller Info 4 Read Ext Controller Info 5 Add Ext Advertising - Success (Empty ScRsp) Add Ext Advertising - Success (Complete name) Add Ext Advertising - Success (Shortened name) Add Ext Advertising - Success (Short name) Add Ext Advertising - Success (Name + data) Add Ext Advertising - Invalid Params (Name + data) Add Ext Advertising - Success (Name+data+appear) Signed-off-by: Brian Gix Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_sync.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index a381621a56a1..d9f2e3182ed8 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -46,6 +46,7 @@ int hci_update_class_sync(struct hci_dev *hdev); int hci_update_eir_sync(struct hci_dev *hdev); int hci_update_class_sync(struct hci_dev *hdev); +int hci_update_name_sync(struct hci_dev *hdev); int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy, bool rpa, u8 *own_addr_type); -- cgit v1.2.3 From 3244845c6307fa6f4fa2eabe5259d2c93c837dce Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Wed, 27 Oct 2021 16:58:58 -0700 Subject: Bluetooth: hci_sync: Convert MGMT_OP_SSP mgmt-tester paths: Set SSP on - Success 2 Set Device ID - SSP off and Power on Signed-off-by: Brian Gix Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 - include/net/bluetooth/hci_sync.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a6b075203cbe..3e53c845ab0e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1806,7 +1806,6 @@ int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 entered); void mgmt_auth_failed(struct hci_conn *conn, u8 status); void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status); -void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status); void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, u8 status); void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status); diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index d9f2e3182ed8..db96546d40c8 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -47,6 +47,7 @@ int hci_update_class_sync(struct hci_dev *hdev); int hci_update_eir_sync(struct hci_dev *hdev); int hci_update_class_sync(struct hci_dev *hdev); int hci_update_name_sync(struct hci_dev *hdev); +int hci_write_ssp_mode_sync(struct hci_dev *hdev, u8 mode); int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy, bool rpa, u8 *own_addr_type); -- cgit v1.2.3 From d0b137062b2de75b264b84143d21c98abc5f5ad2 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 27 Oct 2021 16:58:59 -0700 Subject: Bluetooth: hci_sync: Rework init stages This moves the init stages to use the hci_sync infra and in addition to that have the stages as function tables so it is easier to change the command sequence. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_sync.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index db96546d40c8..487e4981cce1 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -83,6 +83,7 @@ int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance, struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, bool ext, struct sock *sk); +int hci_reset_sync(struct hci_dev *hdev); int hci_dev_open_sync(struct hci_dev *hdev); int hci_dev_close_sync(struct hci_dev *hdev); -- cgit v1.2.3 From 182ee45da083db4e3e621541ccf255bfa9652214 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 27 Oct 2021 16:59:00 -0700 Subject: Bluetooth: hci_sync: Rework hci_suspend_notifier This makes hci_suspend_notifier use the hci_*_sync which can be executed synchronously which is allowed in the suspend_notifier and simplifies a lot of the handling since the status of each command can be checked inline so no other work need to be scheduled thus can be performed without using of a state machine. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 4 ---- include/net/bluetooth/hci_sync.h | 3 +++ 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 3e53c845ab0e..53a8c7d3a4bf 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -523,7 +523,6 @@ struct hci_dev { bool advertising_paused; struct notifier_block suspend_notifier; - struct work_struct suspend_prepare; enum suspended_state suspend_state_next; enum suspended_state suspend_state; bool scanning_paused; @@ -532,9 +531,6 @@ struct hci_dev { bdaddr_t wake_addr; u8 wake_addr_type; - wait_queue_head_t suspend_wait_q; - DECLARE_BITMAP(suspend_tasks, __SUSPEND_NUM_TASKS); - struct hci_conn_hash conn_hash; struct list_head mgmt_pending; diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 487e4981cce1..00b13e8ca800 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -92,3 +92,6 @@ int hci_set_powered_sync(struct hci_dev *hdev, u8 val); int hci_start_discovery_sync(struct hci_dev *hdev); int hci_stop_discovery_sync(struct hci_dev *hdev); + +int hci_suspend_sync(struct hci_dev *hdev); +int hci_resume_sync(struct hci_dev *hdev); -- cgit v1.2.3 From 542cff7893a37445f98ece26aeb3c9c1055e9ea4 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Thu, 28 Oct 2021 12:24:03 -0400 Subject: drm/sched: Avoid lockdep spalt on killing a processes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Probelm: Singlaning one sched fence from within another's sched fence singal callback generates lockdep splat because the both have same lockdep class of their fence->lock Fix: Fix bellow stack by rescheduling to irq work of signaling and killing of jobs that left when entity is killed. [11176.741181] dump_stack+0x10/0x12 [11176.741186] __lock_acquire.cold+0x208/0x2df [11176.741197] lock_acquire+0xc6/0x2d0 [11176.741204] ? dma_fence_signal+0x28/0x80 [11176.741212] _raw_spin_lock_irqsave+0x4d/0x70 [11176.741219] ? dma_fence_signal+0x28/0x80 [11176.741225] dma_fence_signal+0x28/0x80 [11176.741230] drm_sched_fence_finished+0x12/0x20 [gpu_sched] [11176.741240] drm_sched_entity_kill_jobs_cb+0x1c/0x50 [gpu_sched] [11176.741248] dma_fence_signal_timestamp_locked+0xac/0x1a0 [11176.741254] dma_fence_signal+0x3b/0x80 [11176.741260] drm_sched_fence_finished+0x12/0x20 [gpu_sched] [11176.741268] drm_sched_job_done.isra.0+0x7f/0x1a0 [gpu_sched] [11176.741277] drm_sched_job_done_cb+0x12/0x20 [gpu_sched] [11176.741284] dma_fence_signal_timestamp_locked+0xac/0x1a0 [11176.741290] dma_fence_signal+0x3b/0x80 [11176.741296] amdgpu_fence_process+0xd1/0x140 [amdgpu] [11176.741504] sdma_v4_0_process_trap_irq+0x8c/0xb0 [amdgpu] [11176.741731] amdgpu_irq_dispatch+0xce/0x250 [amdgpu] [11176.741954] amdgpu_ih_process+0x81/0x100 [amdgpu] [11176.742174] amdgpu_irq_handler+0x26/0xa0 [amdgpu] [11176.742393] __handle_irq_event_percpu+0x4f/0x2c0 [11176.742402] handle_irq_event_percpu+0x33/0x80 [11176.742408] handle_irq_event+0x39/0x60 [11176.742414] handle_edge_irq+0x93/0x1d0 [11176.742419] __common_interrupt+0x50/0xe0 [11176.742426] common_interrupt+0x80/0x90 Signed-off-by: Andrey Grodzovsky Suggested-by: Daniel Vetter Suggested-by: Christian König Tested-by: Christian König Reviewed-by: Christian König Link: https://www.spinics.net/lists/dri-devel/msg321250.html --- include/drm/gpu_scheduler.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index f011e4c407f2..bbc22fad8d80 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -28,6 +28,7 @@ #include #include #include +#include #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000) @@ -286,7 +287,16 @@ struct drm_sched_job { struct list_head list; struct drm_gpu_scheduler *sched; struct drm_sched_fence *s_fence; - struct dma_fence_cb finish_cb; + + /* + * work is used only after finish_cb has been used and will not be + * accessed anymore. + */ + union { + struct dma_fence_cb finish_cb; + struct irq_work work; + }; + uint64_t id; atomic_t karma; enum drm_sched_priority s_priority; -- cgit v1.2.3 From 0d6a8c5e96833f644b91528de6a3a4398214fb9c Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Mon, 18 Oct 2021 08:47:25 +0000 Subject: drm/sysfs: introduce drm_sysfs_connector_hotplug_event This function sends a hotplug uevent with a CONNECTOR property. Signed-off-by: Simon Ser Reviewed-by: Sam Ravnborg Acked-by: Harry Wentland Acked-by: Pekka Paalanen Link: https://patchwork.freedesktop.org/patch/msgid/20211018084707.32253-2-contact@emersion.fr --- include/drm/drm_sysfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_sysfs.h b/include/drm/drm_sysfs.h index d454ef617b2c..6273cac44e47 100644 --- a/include/drm/drm_sysfs.h +++ b/include/drm/drm_sysfs.h @@ -11,6 +11,7 @@ int drm_class_device_register(struct device *dev); void drm_class_device_unregister(struct device *dev); void drm_sysfs_hotplug_event(struct drm_device *dev); +void drm_sysfs_connector_hotplug_event(struct drm_connector *connector); void drm_sysfs_connector_status_event(struct drm_connector *connector, struct drm_property *property); #endif -- cgit v1.2.3 From 710074bb8ab0efac425a43473b8a3e057d645f82 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Mon, 18 Oct 2021 08:47:26 +0000 Subject: drm/probe-helper: add drm_kms_helper_connector_hotplug_event This function is the same as drm_kms_helper_hotplug_event, but takes a connector instead of a device. Signed-off-by: Simon Ser Reviewed-by: Sam Ravnborg Acked-by: Harry Wentland Acked-by: Pekka Paalanen Link: https://patchwork.freedesktop.org/patch/msgid/20211018084707.32253-3-contact@emersion.fr --- include/drm/drm_probe_helper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_probe_helper.h b/include/drm/drm_probe_helper.h index 04c57564c397..48300aa6ca71 100644 --- a/include/drm/drm_probe_helper.h +++ b/include/drm/drm_probe_helper.h @@ -20,6 +20,7 @@ void drm_kms_helper_poll_fini(struct drm_device *dev); bool drm_helper_hpd_irq_event(struct drm_device *dev); bool drm_connector_helper_hpd_irq_event(struct drm_connector *connector); void drm_kms_helper_hotplug_event(struct drm_device *dev); +void drm_kms_helper_connector_hotplug_event(struct drm_connector *connector); void drm_kms_helper_poll_disable(struct drm_device *dev); void drm_kms_helper_poll_enable(struct drm_device *dev); -- cgit v1.2.3 From 749a6c594203b0e9ac59e3d8da492a8ac6a80510 Mon Sep 17 00:00:00 2001 From: Joseph Hwang Date: Tue, 2 Nov 2021 15:19:28 +0800 Subject: Bluetooth: Add struct of reading AOSP vendor capabilities This patch adds the struct of reading AOSP vendor capabilities. New capabilities are added incrementally. Note that the version_supported octets will be used to determine whether a capability has been defined for the version. Signed-off-by: Joseph Hwang Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 53a8c7d3a4bf..b5f061882c10 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -603,6 +603,7 @@ struct hci_dev { #if IS_ENABLED(CONFIG_BT_AOSPEXT) bool aosp_capable; + bool aosp_quality_report; #endif int (*open)(struct hci_dev *hdev); -- cgit v1.2.3 From f1d3b8f91d965c4fd900ac5dd06240cc9df0c7a7 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Tue, 2 Nov 2021 15:17:15 -0400 Subject: fs: dlm: initial support for tracepoints This patch adds initial support for dlm tracepoints. It will introduce tracepoints to dlm main functionality dlm_lock()/dlm_unlock() and their complete ast() callback or blocking bast() callback. The lock/unlock functionality has a start and end tracepoint, this is because there exists a race in case if would have a tracepoint at the end position only the complete/blocking callbacks could occur before. To work with eBPF tracing and using their lookup hash functionality there could be problems that an entry was not inserted yet. However use the start functionality for hash insert and check again in end functionality if there was an dlm internal error so there is no ast callback. In further it might also that locks with local masters will occur those callbacks immediately so we must have such functionality. I did not make everything accessible yet, although it seems eBPF can be used to access a lot of internal datastructures if it's aware of the struct definitions of the running kernel instance. We still can change it, if you do eBPF experiments e.g. time measurements between lock and callback functionality you can simple use the local lkb_id field as hash value in combination with the lockspace id if you have multiple lockspaces. Otherwise you can simple use trace-cmd for some functionality, e.g. `trace-cmd record -e dlm` and `trace-cmd report` afterwards. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- include/trace/events/dlm.h | 220 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 include/trace/events/dlm.h (limited to 'include') diff --git a/include/trace/events/dlm.h b/include/trace/events/dlm.h new file mode 100644 index 000000000000..c97b4c163c3e --- /dev/null +++ b/include/trace/events/dlm.h @@ -0,0 +1,220 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM dlm + +#if !defined(_TRACE_DLM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DLM_H + +#include +#include +#include + +#include "../../../fs/dlm/dlm_internal.h" + +#define show_lock_flags(flags) __print_flags(flags, "|", \ + { DLM_LKF_NOQUEUE, "NOQUEUE" }, \ + { DLM_LKF_CANCEL, "CANCEL" }, \ + { DLM_LKF_CONVERT, "CONVERT" }, \ + { DLM_LKF_VALBLK, "VALBLK" }, \ + { DLM_LKF_QUECVT, "QUECVT" }, \ + { DLM_LKF_IVVALBLK, "IVVALBLK" }, \ + { DLM_LKF_CONVDEADLK, "CONVDEADLK" }, \ + { DLM_LKF_PERSISTENT, "PERSISTENT" }, \ + { DLM_LKF_NODLCKWT, "NODLCKWT" }, \ + { DLM_LKF_NODLCKBLK, "NODLCKBLK" }, \ + { DLM_LKF_EXPEDITE, "EXPEDITE" }, \ + { DLM_LKF_NOQUEUEBAST, "NOQUEUEBAST" }, \ + { DLM_LKF_HEADQUE, "HEADQUE" }, \ + { DLM_LKF_NOORDER, "NOORDER" }, \ + { DLM_LKF_ORPHAN, "ORPHAN" }, \ + { DLM_LKF_ALTPR, "ALTPR" }, \ + { DLM_LKF_ALTCW, "ALTCW" }, \ + { DLM_LKF_FORCEUNLOCK, "FORCEUNLOCK" }, \ + { DLM_LKF_TIMEOUT, "TIMEOUT" }) + +#define show_lock_mode(mode) __print_symbolic(mode, \ + { DLM_LOCK_IV, "IV"}, \ + { DLM_LOCK_NL, "NL"}, \ + { DLM_LOCK_CR, "CR"}, \ + { DLM_LOCK_CW, "CW"}, \ + { DLM_LOCK_PR, "PR"}, \ + { DLM_LOCK_PW, "PW"}, \ + { DLM_LOCK_EX, "EX"}) + +#define show_dlm_sb_flags(flags) __print_flags(flags, "|", \ + { DLM_SBF_DEMOTED, "DEMOTED" }, \ + { DLM_SBF_VALNOTVALID, "VALNOTVALID" }, \ + { DLM_SBF_ALTMODE, "ALTMODE" }) + +/* note: we begin tracing dlm_lock_start() only if ls and lkb are found */ +TRACE_EVENT(dlm_lock_start, + + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode, + __u32 flags), + + TP_ARGS(ls, lkb, mode, flags), + + TP_STRUCT__entry( + __field(__u32, ls_id) + __field(__u32, lkb_id) + __field(int, mode) + __field(__u32, flags) + ), + + TP_fast_assign( + __entry->ls_id = ls->ls_global_id; + __entry->lkb_id = lkb->lkb_id; + __entry->mode = mode; + __entry->flags = flags; + ), + + TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s", + __entry->ls_id, __entry->lkb_id, + show_lock_mode(__entry->mode), + show_lock_flags(__entry->flags)) + +); + +TRACE_EVENT(dlm_lock_end, + + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode, __u32 flags, + int error), + + TP_ARGS(ls, lkb, mode, flags, error), + + TP_STRUCT__entry( + __field(__u32, ls_id) + __field(__u32, lkb_id) + __field(int, mode) + __field(__u32, flags) + __field(int, error) + ), + + TP_fast_assign( + __entry->ls_id = ls->ls_global_id; + __entry->lkb_id = lkb->lkb_id; + __entry->mode = mode; + __entry->flags = flags; + + /* return value will be zeroed in those cases by dlm_lock() + * we do it here again to not introduce more overhead if + * trace isn't running and error reflects the return value. + */ + if (error == -EAGAIN || error == -EDEADLK) + __entry->error = 0; + else + __entry->error = error; + ), + + TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s error=%d", + __entry->ls_id, __entry->lkb_id, + show_lock_mode(__entry->mode), + show_lock_flags(__entry->flags), __entry->error) + +); + +TRACE_EVENT(dlm_bast, + + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode), + + TP_ARGS(ls, lkb, mode), + + TP_STRUCT__entry( + __field(__u32, ls_id) + __field(__u32, lkb_id) + __field(int, mode) + ), + + TP_fast_assign( + __entry->ls_id = ls->ls_global_id; + __entry->lkb_id = lkb->lkb_id; + __entry->mode = mode; + ), + + TP_printk("ls_id=%u lkb_id=%x mode=%s", __entry->ls_id, + __entry->lkb_id, show_lock_mode(__entry->mode)) + +); + +TRACE_EVENT(dlm_ast, + + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_lksb *lksb), + + TP_ARGS(ls, lkb, lksb), + + TP_STRUCT__entry( + __field(__u32, ls_id) + __field(__u32, lkb_id) + __field(u8, sb_flags) + __field(int, sb_status) + ), + + TP_fast_assign( + __entry->ls_id = ls->ls_global_id; + __entry->lkb_id = lkb->lkb_id; + __entry->sb_flags = lksb->sb_flags; + __entry->sb_status = lksb->sb_status; + ), + + TP_printk("ls_id=%u lkb_id=%x sb_flags=%s sb_status=%d", + __entry->ls_id, __entry->lkb_id, + show_dlm_sb_flags(__entry->sb_flags), __entry->sb_status) + +); + +/* note: we begin tracing dlm_unlock_start() only if ls and lkb are found */ +TRACE_EVENT(dlm_unlock_start, + + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, __u32 flags), + + TP_ARGS(ls, lkb, flags), + + TP_STRUCT__entry( + __field(__u32, ls_id) + __field(__u32, lkb_id) + __field(__u32, flags) + ), + + TP_fast_assign( + __entry->ls_id = ls->ls_global_id; + __entry->lkb_id = lkb->lkb_id; + __entry->flags = flags; + ), + + TP_printk("ls_id=%u lkb_id=%x flags=%s", + __entry->ls_id, __entry->lkb_id, + show_lock_flags(__entry->flags)) + +); + +TRACE_EVENT(dlm_unlock_end, + + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, __u32 flags, + int error), + + TP_ARGS(ls, lkb, flags, error), + + TP_STRUCT__entry( + __field(__u32, ls_id) + __field(__u32, lkb_id) + __field(__u32, flags) + __field(int, error) + ), + + TP_fast_assign( + __entry->ls_id = ls->ls_global_id; + __entry->lkb_id = lkb->lkb_id; + __entry->flags = flags; + __entry->error = error; + ), + + TP_printk("ls_id=%u lkb_id=%x flags=%s error=%d", + __entry->ls_id, __entry->lkb_id, + show_lock_flags(__entry->flags), __entry->error) + +); + +#endif /* if !defined(_TRACE_DLM_H) || defined(TRACE_HEADER_MULTI_READ) */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 92732376fd29462b502f41486bcef55f49c5713e Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Tue, 2 Nov 2021 15:17:16 -0400 Subject: fs: dlm: trace socket handling This patch adds tracepoints for dlm socket receive and send functionality. We can use it to track how much data was send or received to or from a specific nodeid. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- include/trace/events/dlm.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/trace/events/dlm.h b/include/trace/events/dlm.h index c97b4c163c3e..32088c603244 100644 --- a/include/trace/events/dlm.h +++ b/include/trace/events/dlm.h @@ -214,6 +214,46 @@ TRACE_EVENT(dlm_unlock_end, ); +TRACE_EVENT(dlm_send, + + TP_PROTO(int nodeid, int ret), + + TP_ARGS(nodeid, ret), + + TP_STRUCT__entry( + __field(int, nodeid) + __field(int, ret) + ), + + TP_fast_assign( + __entry->nodeid = nodeid; + __entry->ret = ret; + ), + + TP_printk("nodeid=%d ret=%d", __entry->nodeid, __entry->ret) + +); + +TRACE_EVENT(dlm_recv, + + TP_PROTO(int nodeid, int ret), + + TP_ARGS(nodeid, ret), + + TP_STRUCT__entry( + __field(int, nodeid) + __field(int, ret) + ), + + TP_fast_assign( + __entry->nodeid = nodeid; + __entry->ret = ret; + ), + + TP_printk("nodeid=%d ret=%d", __entry->nodeid, __entry->ret) + +); + #endif /* if !defined(_TRACE_DLM_H) || defined(TRACE_HEADER_MULTI_READ) */ /* This part must be outside protection */ -- cgit v1.2.3 From 7c7e3d31e7856a8260a254f8c71db416f7f9f5a1 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 5 Nov 2021 16:23:29 -0700 Subject: bpf: Introduce helper bpf_find_vma In some profiler use cases, it is necessary to map an address to the backing file, e.g., a shared library. bpf_find_vma helper provides a flexible way to achieve this. bpf_find_vma maps an address of a task to the vma (vm_area_struct) for this address, and feed the vma to an callback BPF function. The callback function is necessary here, as we need to ensure mmap_sem is unlocked. It is necessary to lock mmap_sem for find_vma. To lock and unlock mmap_sem safely when irqs are disable, we use the same mechanism as stackmap with build_id. Specifically, when irqs are disabled, the unlocked is postponed in an irq_work. Refactor stackmap.c so that the irq_work is shared among bpf_find_vma and stackmap helpers. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Tested-by: Hengqi Chen Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211105232330.1936330-2-songliubraving@fb.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2be6dfd68df9..df3410bff4b0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2157,6 +2157,7 @@ extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; +extern const struct bpf_func_proto bpf_find_vma_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ba5af15e25f5..509eee5f0393 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4938,6 +4938,25 @@ union bpf_attr { * **-ENOENT** if symbol is not found. * * **-EPERM** if caller does not have permission to obtain kernel address. + * + * long bpf_find_vma(struct task_struct *task, u64 addr, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * Find vma of *task* that contains *addr*, call *callback_fn* + * function with *task*, *vma*, and *callback_ctx*. + * The *callback_fn* should be a static function and + * the *callback_ctx* should be a pointer to the stack. + * The *flags* is used to control certain aspects of the helper. + * Currently, the *flags* must be 0. + * + * The expected callback signature is + * + * long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx); + * + * Return + * 0 on success. + * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. + * **-EBUSY** if failed to try lock mmap_lock. + * **-EINVAL** for invalid **flags**. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5120,6 +5139,7 @@ union bpf_attr { FN(trace_vprintk), \ FN(skc_to_unix_sock), \ FN(kallsyms_lookup_name), \ + FN(find_vma), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From fa443bc3c1e4b28d9315dea882e8358ba6e26f8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 29 Oct 2021 17:28:56 +0200 Subject: HID: intel-ish-hid: add support for MODULE_DEVICE_TABLE() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows to selectively autoload drivers for ISH devices. Currently all ISH drivers are loaded for all systems having any ISH device. Signed-off-by: Thomas Weißschuh Acked-by: Srinivas Pandruvada Acked-by: Hans de Goede Signed-off-by: Jiri Kosina --- include/linux/mod_devicetable.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index ae2e75d15b21..befbf53c4b7c 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -895,4 +895,17 @@ struct dfl_device_id { kernel_ulong_t driver_data; }; +/* ISHTP (Integrated Sensor Hub Transport Protocol) */ + +#define ISHTP_MODULE_PREFIX "ishtp:" + +/** + * struct ishtp_device_id - ISHTP device identifier + * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba + * @context: pointer to driver specific data + */ +struct ishtp_device_id { + guid_t guid; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ -- cgit v1.2.3 From 64355db3caf6468dc711995239efe0cbcd7d0091 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 10 Nov 2021 13:16:55 +0100 Subject: mod_devicetable: fix kdocs for ishtp_device_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kdocs were copied from another device_id struct and not adapted. Fixes: fa443bc3c1e4 ("HID: intel-ish-hid: add support for MODULE_DEVICE_TABLE()") Signed-off-by: Thomas Weißschuh Reported-by: Stephen Rothwell Signed-off-by: Jiri Kosina --- include/linux/mod_devicetable.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index befbf53c4b7c..c70abe7aaef2 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -901,8 +901,7 @@ struct dfl_device_id { /** * struct ishtp_device_id - ISHTP device identifier - * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba - * @context: pointer to driver specific data + * @guid: GUID of the device. */ struct ishtp_device_id { guid_t guid; -- cgit v1.2.3 From 5d5e4522a7f404d1a96fd6c703989d32a9c9568d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 7 Nov 2021 14:51:16 +1000 Subject: printk: restore flushing of NMI buffers on remote CPUs after NMI backtraces printk from NMI context relies on irq work being raised on the local CPU to print to console. This can be a problem if the NMI was raised by a lockup detector to print lockup stack and regs, because the CPU may not enable irqs (because it is locked up). Introduce printk_trigger_flush() that can be called another CPU to try to get those messages to the console, call that where printk_safe_flush was previously called. Fixes: 93d102f094be ("printk: remove safe buffers") Cc: stable@vger.kernel.org # 5.15 Signed-off-by: Nicholas Piggin Reviewed-by: Petr Mladek Reviewed-by: John Ogness Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20211107045116.1754411-1-npiggin@gmail.com --- include/linux/printk.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index a1379df43251..596ad6fa0336 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -206,6 +206,7 @@ void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; extern asmlinkage void dump_stack(void) __cold; +void printk_trigger_flush(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -282,6 +283,9 @@ static inline void dump_stack_lvl(const char *log_lvl) static inline void dump_stack(void) { } +static inline void printk_trigger_flush(void) +{ +} #endif #ifdef CONFIG_SMP -- cgit v1.2.3 From f89315650ba34ec6c91a8bded72796980bee2a4d Mon Sep 17 00:00:00 2001 From: Mark Pashmfouroush Date: Wed, 10 Nov 2021 11:10:15 +0000 Subject: bpf: Add ingress_ifindex to bpf_sk_lookup It may be helpful to have access to the ifindex during bpf socket lookup. An example may be to scope certain socket lookup logic to specific interfaces, i.e. an interface may be made exempt from custom lookup code. Add the ifindex of the arriving connection to the bpf_sk_lookup API. Signed-off-by: Mark Pashmfouroush Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211110111016.5670-2-markpash@cloudflare.com --- include/linux/filter.h | 7 +++++-- include/uapi/linux/bpf.h | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 24b7ed2677af..b6a216eb217a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1374,6 +1374,7 @@ struct bpf_sk_lookup_kern { const struct in6_addr *daddr; } v6; struct sock *selected_sk; + u32 ingress_ifindex; bool no_reuseport; }; @@ -1436,7 +1437,7 @@ extern struct static_key_false bpf_sk_lookup_enabled; static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 dport, - struct sock **psk) + const int ifindex, struct sock **psk) { struct bpf_prog_array *run_array; struct sock *selected_sk = NULL; @@ -1452,6 +1453,7 @@ static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, .v4.daddr = daddr, .sport = sport, .dport = dport, + .ingress_ifindex = ifindex, }; u32 act; @@ -1474,7 +1476,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, const __be16 sport, const struct in6_addr *daddr, const u16 dport, - struct sock **psk) + const int ifindex, struct sock **psk) { struct bpf_prog_array *run_array; struct sock *selected_sk = NULL; @@ -1490,6 +1492,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, .v6.daddr = daddr, .sport = sport, .dport = dport, + .ingress_ifindex = ifindex, }; u32 act; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 509eee5f0393..6297eafdc40f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6316,6 +6316,7 @@ struct bpf_sk_lookup { __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */ + __u32 ingress_ifindex; /* The arriving interface. Determined by inet_iif. */ }; /* -- cgit v1.2.3 From a25efb3863d068929f0bbeb87a995df11507e691 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 23 Sep 2021 13:57:42 +0200 Subject: dma-buf: add dma_fence_describe and dma_resv_describe v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add functions to dump dma_fence and dma_resv objects into a seq_file and use them for printing the debugfs information. v2: fix missing include reported by test robot. Signed-off-by: Christian König Reviewed-by: Rob Clark Link: https://patchwork.freedesktop.org/patch/msgid/20211103081231.18578-2-christian.koenig@amd.com --- include/linux/dma-fence.h | 1 + include/linux/dma-resv.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index a706b7bf51d7..1ea691753bd3 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -264,6 +264,7 @@ void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, void dma_fence_release(struct kref *kref); void dma_fence_free(struct dma_fence *fence); +void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq); /** * dma_fence_put - decreases refcount of the fence diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index dbd235ab447f..09c6063b199a 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -490,5 +490,6 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src); long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout); bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all); +void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq); #endif /* _LINUX_RESERVATION_H */ -- cgit v1.2.3 From 396d9b9a48723995416a05f0dab80c1dd6c21e80 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 8 Nov 2021 11:28:46 +0100 Subject: drm: Update documentation and TODO of gem_prime_mmap hook The hook gem_prime_mmap in struct drm_driver is deprecated. Document the new requirements. Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20211108102846.309-4-tzimmermann@suse.de --- include/drm/drm_drv.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index 0cd95953cdf5..a84eb4028e5b 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -345,11 +345,14 @@ struct drm_driver { * mmap hook for GEM drivers, used to implement dma-buf mmap in the * PRIME helpers. * - * FIXME: There's way too much duplication going on here, and also moved - * to &drm_gem_object_funcs. + * This hook only exists for historical reasons. Drivers must use + * drm_gem_prime_mmap() to implement it. + * + * FIXME: Convert all drivers to implement mmap in struct + * &drm_gem_object_funcs and inline drm_gem_prime_mmap() into + * its callers. This hook should be removed afterwards. */ - int (*gem_prime_mmap)(struct drm_gem_object *obj, - struct vm_area_struct *vma); + int (*gem_prime_mmap)(struct drm_gem_object *obj, struct vm_area_struct *vma); /** * @dumb_create: -- cgit v1.2.3 From 452290f354f04fca53cba810bd241f4307502f1e Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 10 Nov 2021 11:36:54 +0100 Subject: drm/format-helper: Export drm_fb_clip_offset() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide a function that computes the offset into a blit destination buffer. This will allow to move destination-buffer clipping into the format-helper callers. v4: * add missing '@' for parameter documentation * fix typo 'frambuffer' v2: * provide documentation (Sam) * return 'unsigned int' (Sam, Noralf) Signed-off-by: Thomas Zimmermann Reviewed-by: Noralf Trønnes Link: https://patchwork.freedesktop.org/patch/msgid/20211110103702.374-2-tzimmermann@suse.de --- include/drm/drm_format_helper.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index e86925cf07b9..f5a8b334b18d 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -6,9 +6,13 @@ #ifndef __LINUX_DRM_FORMAT_HELPER_H #define __LINUX_DRM_FORMAT_HELPER_H +struct drm_format_info; struct drm_framebuffer; struct drm_rect; +unsigned int drm_fb_clip_offset(unsigned int pitch, const struct drm_format_info *format, + const struct drm_rect *clip); + void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb, struct drm_rect *clip); void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr, -- cgit v1.2.3 From 27bd66dd6419c45e320f34ed419cd80833de1161 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 10 Nov 2021 11:36:55 +0100 Subject: drm/format-helper: Rework format-helper memcpy functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move destination-buffer clipping from all format-helper memcpy function into callers. Support destination-buffer pitch. Only distinguish between system and I/O memory, but use same logic everywhere. Signed-off-by: Thomas Zimmermann Tested-by: Noralf Trønnes Reviewed-by: Noralf Trønnes Link: https://patchwork.freedesktop.org/patch/msgid/20211110103702.374-3-tzimmermann@suse.de --- include/drm/drm_format_helper.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index f5a8b334b18d..1fc3ba7b6060 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -13,11 +13,10 @@ struct drm_rect; unsigned int drm_fb_clip_offset(unsigned int pitch, const struct drm_format_info *format, const struct drm_rect *clip); -void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb, - struct drm_rect *clip); -void drm_fb_memcpy_dstclip(void __iomem *dst, unsigned int dst_pitch, void *vaddr, - struct drm_framebuffer *fb, - struct drm_rect *clip); +void drm_fb_memcpy(void *dst, unsigned int dst_pitch, const void *vaddr, + const struct drm_framebuffer *fb, const struct drm_rect *clip); +void drm_fb_memcpy_toio(void __iomem *dst, unsigned int dst_pitch, const void *vaddr, + const struct drm_framebuffer *fb, const struct drm_rect *clip); void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb, struct drm_rect *clip, bool cached); void drm_fb_xrgb8888_to_rgb332(void *dst, void *vaddr, struct drm_framebuffer *fb, -- cgit v1.2.3 From 3e3543c8a19cade02cefde83ee97233d5d90e7e5 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 10 Nov 2021 11:36:56 +0100 Subject: drm/format-helper: Add destination-buffer pitch to drm_fb_swab() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add destination-buffer pitch as argument to drm_fb_swab(). Done for consistency with the rest of the interface. v2: * update documentation (Noralf) Signed-off-by: Thomas Zimmermann Tested-by: Noralf Trønnes Reviewed-by: Noralf Trønnes Link: https://patchwork.freedesktop.org/patch/msgid/20211110103702.374-4-tzimmermann@suse.de --- include/drm/drm_format_helper.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index 1fc3ba7b6060..ddcba5abe306 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -17,8 +17,9 @@ void drm_fb_memcpy(void *dst, unsigned int dst_pitch, const void *vaddr, const struct drm_framebuffer *fb, const struct drm_rect *clip); void drm_fb_memcpy_toio(void __iomem *dst, unsigned int dst_pitch, const void *vaddr, const struct drm_framebuffer *fb, const struct drm_rect *clip); -void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb, - struct drm_rect *clip, bool cached); +void drm_fb_swab(void *dst, unsigned int dst_pitch, const void *src, + const struct drm_framebuffer *fb, const struct drm_rect *clip, + bool cached); void drm_fb_xrgb8888_to_rgb332(void *dst, void *vaddr, struct drm_framebuffer *fb, struct drm_rect *clip); void drm_fb_xrgb8888_to_rgb565(void *dst, void *vaddr, -- cgit v1.2.3 From 53bc2098d2b6ccff25fe13f9345cbb5c0ef34a99 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 10 Nov 2021 11:36:57 +0100 Subject: drm/format-helper: Rework format-helper conversion functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move destination-buffer clipping from all format-helper conversion functions into callers. Support destination-buffer pitch. Only distinguish between system and I/O memory, but use same logic everywhere. Simply harmonize the interface and semantics of the existing code. Not all conversion helpers support all combinations of parameters. We have to add additional features when we need them. v2: * fix default destination pitch in drm_fb_xrgb8888_to_gray8() (Noralf) Signed-off-by: Thomas Zimmermann Tested-by: Noralf Trønnes Reviewed-by: Noralf Trønnes Link: https://patchwork.freedesktop.org/patch/msgid/20211110103702.374-5-tzimmermann@suse.de --- include/drm/drm_format_helper.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index ddcba5abe306..6b366f422a24 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -20,21 +20,21 @@ void drm_fb_memcpy_toio(void __iomem *dst, unsigned int dst_pitch, const void *v void drm_fb_swab(void *dst, unsigned int dst_pitch, const void *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, bool cached); -void drm_fb_xrgb8888_to_rgb332(void *dst, void *vaddr, struct drm_framebuffer *fb, - struct drm_rect *clip); -void drm_fb_xrgb8888_to_rgb565(void *dst, void *vaddr, - struct drm_framebuffer *fb, - struct drm_rect *clip, bool swab); -void drm_fb_xrgb8888_to_rgb565_dstclip(void __iomem *dst, unsigned int dst_pitch, - void *vaddr, struct drm_framebuffer *fb, - struct drm_rect *clip, bool swab); -void drm_fb_xrgb8888_to_rgb888(void *dst, void *src, struct drm_framebuffer *fb, - struct drm_rect *clip); -void drm_fb_xrgb8888_to_rgb888_dstclip(void __iomem *dst, unsigned int dst_pitch, - void *vaddr, struct drm_framebuffer *fb, - struct drm_rect *clip); -void drm_fb_xrgb8888_to_gray8(u8 *dst, void *vaddr, struct drm_framebuffer *fb, - struct drm_rect *clip); +void drm_fb_xrgb8888_to_rgb332(void *dst, unsigned int dst_pitch, const void *vaddr, + const struct drm_framebuffer *fb, const struct drm_rect *clip); +void drm_fb_xrgb8888_to_rgb565(void *dst, unsigned int dst_pitch, const void *vaddr, + const struct drm_framebuffer *fb, const struct drm_rect *clip, + bool swab); +void drm_fb_xrgb8888_to_rgb565_toio(void __iomem *dst, unsigned int dst_pitch, + const void *vaddr, const struct drm_framebuffer *fb, + const struct drm_rect *clip, bool swab); +void drm_fb_xrgb8888_to_rgb888(void *dst, unsigned int dst_pitch, const void *src, + const struct drm_framebuffer *fb, const struct drm_rect *clip); +void drm_fb_xrgb8888_to_rgb888_toio(void __iomem *dst, unsigned int dst_pitch, + const void *vaddr, const struct drm_framebuffer *fb, + const struct drm_rect *clip); +void drm_fb_xrgb8888_to_gray8(void *dst, unsigned int dst_pitch, const void *vaddr, + const struct drm_framebuffer *fb, const struct drm_rect *clip); int drm_fb_blit_rect_dstclip(void __iomem *dst, unsigned int dst_pitch, uint32_t dst_format, void *vmap, -- cgit v1.2.3 From 19b20a8021315fce38ae95607e5de389913471a7 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 10 Nov 2021 11:36:58 +0100 Subject: drm/format-helper: Streamline blit-helper interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move destination-buffer clipping from format-helper blit function into caller. Rename drm_fb_blit_rect_dstclip() to drm_fb_blit_toio(). Done for consistency with the rest of the interface. Remove drm_fb_blit_dstclip(), which isn't required. Signed-off-by: Thomas Zimmermann Reviewed-by: Noralf Trønnes Link: https://patchwork.freedesktop.org/patch/msgid/20211110103702.374-6-tzimmermann@suse.de --- include/drm/drm_format_helper.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index 6b366f422a24..97e4c3223af3 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -36,12 +36,8 @@ void drm_fb_xrgb8888_to_rgb888_toio(void __iomem *dst, unsigned int dst_pitch, void drm_fb_xrgb8888_to_gray8(void *dst, unsigned int dst_pitch, const void *vaddr, const struct drm_framebuffer *fb, const struct drm_rect *clip); -int drm_fb_blit_rect_dstclip(void __iomem *dst, unsigned int dst_pitch, - uint32_t dst_format, void *vmap, - struct drm_framebuffer *fb, - struct drm_rect *rect); -int drm_fb_blit_dstclip(void __iomem *dst, unsigned int dst_pitch, - uint32_t dst_format, void *vmap, - struct drm_framebuffer *fb); +int drm_fb_blit_toio(void __iomem *dst, unsigned int dst_pitch, uint32_t dst_format, + const void *vmap, const struct drm_framebuffer *fb, + const struct drm_rect *rect); #endif /* __LINUX_DRM_FORMAT_HELPER_H */ -- cgit v1.2.3 From 9239f3e1807c282e3c6bced510640910e9b25c60 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 10 Nov 2021 11:37:01 +0100 Subject: drm/simpledrm: Support virtual screen sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add constants for the maximum size of the shadow-plane surface size. Useful for shadow planes with virtual screen sizes. The current sizes are 4096 scanlines with 4096 pixels each. This seems reasonable for current hardware, but can be increased as necessary. In simpledrm, set the maximum framebuffer size from the constants for shadow planes. Implements support for virtual screen sizes and page flipping on the fbdev console. v3: * use decimal numbers for shadow-plane constants (Noralf) Signed-off-by: Thomas Zimmermann Acked-by: Noralf Trønnes Link: https://patchwork.freedesktop.org/patch/msgid/20211110103702.374-9-tzimmermann@suse.de --- include/drm/drm_gem_atomic_helper.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gem_atomic_helper.h b/include/drm/drm_gem_atomic_helper.h index 48222a107873..0b1e2dd2ac3f 100644 --- a/include/drm/drm_gem_atomic_helper.h +++ b/include/drm/drm_gem_atomic_helper.h @@ -22,6 +22,24 @@ int drm_gem_simple_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe, * Helpers for planes with shadow buffers */ +/** + * DRM_SHADOW_PLANE_MAX_WIDTH - Maximum width of a plane's shadow buffer in pixels + * + * For drivers with shadow planes, the maximum width of the framebuffer is + * usually independent from hardware limitations. Drivers can initialize struct + * drm_mode_config.max_width from DRM_SHADOW_PLANE_MAX_WIDTH. + */ +#define DRM_SHADOW_PLANE_MAX_WIDTH (4096u) + +/** + * DRM_SHADOW_PLANE_MAX_HEIGHT - Maximum height of a plane's shadow buffer in scanlines + * + * For drivers with shadow planes, the maximum height of the framebuffer is + * usually independent from hardware limitations. Drivers can initialize struct + * drm_mode_config.max_height from DRM_SHADOW_PLANE_MAX_HEIGHT. + */ +#define DRM_SHADOW_PLANE_MAX_HEIGHT (4096u) + /** * struct drm_shadow_plane_state - plane state for planes with shadow buffers * -- cgit v1.2.3 From 84e9dfd5185285df55ae9068c89cde1a88baa7b7 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 10 Nov 2021 11:37:02 +0100 Subject: drm: Clarify semantics of struct drm_mode_config.{min, max}_{width, height} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add additional information on the semantics of the size fields in struct drm_mode_config. Also add a TODO to review all driver for correct usage of these fields. Signed-off-by: Thomas Zimmermann Acked-by: Noralf Trønnes Link: https://patchwork.freedesktop.org/patch/msgid/20211110103702.374-10-tzimmermann@suse.de --- include/drm/drm_mode_config.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 48b7de80daf5..91ca575a78de 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -359,6 +359,19 @@ struct drm_mode_config_funcs { * Core mode resource tracking structure. All CRTC, encoders, and connectors * enumerated by the driver are added here, as are global properties. Some * global restrictions are also here, e.g. dimension restrictions. + * + * Framebuffer sizes refer to the virtual screen that can be displayed by + * the CRTC. This can be different from the physical resolution programmed. + * The minimum width and height, stored in @min_width and @min_height, + * describe the smallest size of the framebuffer. It correlates to the + * minimum programmable resolution. + * The maximum width, stored in @max_width, is typically limited by the + * maximum pitch between two adjacent scanlines. The maximum height, stored + * in @max_height, is usually only limited by the amount of addressable video + * memory. For hardware that has no real maximum, drivers should pick a + * reasonable default. + * + * See also @DRM_SHADOW_PLANE_MAX_WIDTH and @DRM_SHADOW_PLANE_MAX_HEIGHT. */ struct drm_mode_config { /** -- cgit v1.2.3 From bf9167a8b40c9cf463521da05342db81808c1b6e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 Nov 2021 09:56:33 +0100 Subject: HID: intel-ish-hid: fix module device-id handling A late addititon to the intel-ish-hid framework caused a build failure with clang, and introduced an ABI to the module loader that stops working if any driver ever needs to bind to more than one UUID: drivers/hid/intel-ish-hid/ishtp-fw-loader.c:1067:4: error: initializer element is not a compile-time constant Change the ishtp_device_id to have correct documentation and a driver_data field like all the other ones, and change the drivers to use the ID table as the primary identification in a way that works with all compilers and avoids duplciating the identifiers. Fixes: f155dfeaa4ee ("platform/x86: isthp_eclite: only load for matching devices") Fixes: facfe0a4fdce ("platform/chrome: chros_ec_ishtp: only load for matching devices") Fixes: 0d0cccc0fd83 ("HID: intel-ish-hid: hid-client: only load for matching devices") Fixes: 44e2a58cb880 ("HID: intel-ish-hid: fw-loader: only load for matching devices") Fixes: cb1a2c6847f7 ("HID: intel-ish-hid: use constants for modaliases") Fixes: fa443bc3c1e4 ("HID: intel-ish-hid: add support for MODULE_DEVICE_TABLE()") Signed-off-by: Arnd Bergmann Reviewed-by: Hans de Goede [jkosina@suse.cz: fix ecl_ishtp_cl_driver.id initialization] [jkosina@suse.cz: fix conflict with already fixed kerneldoc] Signed-off-by: Jiri Kosina --- include/linux/intel-ish-client-if.h | 4 ++-- include/linux/mod_devicetable.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h index aee8ff4739b1..f45f13304add 100644 --- a/include/linux/intel-ish-client-if.h +++ b/include/linux/intel-ish-client-if.h @@ -9,7 +9,7 @@ #define _INTEL_ISH_CLIENT_IF_H_ #include -#include +#include struct ishtp_cl_device; struct ishtp_device; @@ -40,7 +40,7 @@ enum cl_state { struct ishtp_cl_driver { struct device_driver driver; const char *name; - const guid_t *guid; + const struct ishtp_device_id *id; int (*probe)(struct ishtp_cl_device *dev); void (*remove)(struct ishtp_cl_device *dev); int (*reset)(struct ishtp_cl_device *dev); diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index c70abe7aaef2..4bb71979a8fd 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -902,9 +902,11 @@ struct dfl_device_id { /** * struct ishtp_device_id - ISHTP device identifier * @guid: GUID of the device. + * @driver_data: pointer to driver specific data */ struct ishtp_device_id { guid_t guid; + kernel_ulong_t driver_data; }; #endif /* LINUX_MOD_DEVICETABLE_H */ -- cgit v1.2.3 From 8c42d2fa4eeab6c37a0b1b1aa7a2715248ef4f34 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 11 Nov 2021 17:26:09 -0800 Subject: bpf: Support BTF_KIND_TYPE_TAG for btf_type_tag attributes LLVM patches ([1] for clang, [2] and [3] for BPF backend) added support for btf_type_tag attributes. This patch added support for the kernel. The main motivation for btf_type_tag is to bring kernel annotations __user, __rcu etc. to btf. With such information available in btf, bpf verifier can detect mis-usages and reject the program. For example, for __user tagged pointer, developers can then use proper helper like bpf_probe_read_user() etc. to read the data. BTF_KIND_TYPE_TAG may also useful for other tracing facility where instead of to require user to specify kernel/user address type, the kernel can detect it by itself with btf. [1] https://reviews.llvm.org/D111199 [2] https://reviews.llvm.org/D113222 [3] https://reviews.llvm.org/D113496 Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211112012609.1505032-1-yhs@fb.com --- include/uapi/linux/btf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index deb12f755f0f..b0d8fea1951d 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -43,7 +43,7 @@ struct btf_type { * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO, VAR and DECL_TAG. + * FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG. * "type" is a type_id referring to another type. */ union { @@ -75,6 +75,7 @@ enum { BTF_KIND_DATASEC = 15, /* Section */ BTF_KIND_FLOAT = 16, /* Floating point */ BTF_KIND_DECL_TAG = 17, /* Decl Tag */ + BTF_KIND_TYPE_TAG = 18, /* Type Tag */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, -- cgit v1.2.3 From 9e2ad638ae3632ef916ceb39f70e3104bf8fdc97 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 12 Nov 2021 07:02:42 -0800 Subject: bpf: Extend BTF_ID_LIST_GLOBAL with parameter for number of IDs syzbot reported the following BUG w/o CONFIG_DEBUG_INFO_BTF BUG: KASAN: global-out-of-bounds in task_iter_init+0x212/0x2e7 kernel/bpf/task_iter.c:661 Read of size 4 at addr ffffffff90297404 by task swapper/0/1 CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.15.0-syzkaller #0 Hardware name: ... Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0xf/0x309 mm/kasan/report.c:256 __kasan_report mm/kasan/report.c:442 [inline] kasan_report.cold+0x83/0xdf mm/kasan/report.c:459 task_iter_init+0x212/0x2e7 kernel/bpf/task_iter.c:661 do_one_initcall+0x103/0x650 init/main.c:1295 do_initcall_level init/main.c:1368 [inline] do_initcalls init/main.c:1384 [inline] do_basic_setup init/main.c:1403 [inline] kernel_init_freeable+0x6b1/0x73a init/main.c:1606 kernel_init+0x1a/0x1d0 init/main.c:1497 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 This is caused by hard-coded name[1] in BTF_ID_LIST_GLOBAL (w/o CONFIG_DEBUG_INFO_BTF). Fix this by adding a parameter n to BTF_ID_LIST_GLOBAL. This avoids ifdef CONFIG_DEBUG_INFO_BTF in btf.c and filter.c. Fixes: 7c7e3d31e785 ("bpf: Introduce helper bpf_find_vma") Reported-by: syzbot+e0d81ec552a21d9071aa@syzkaller.appspotmail.com Reported-by: Eric Dumazet Suggested-by: Eric Dumazet Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211112150243.1270987-2-songliubraving@fb.com --- include/linux/btf_ids.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 47d9abfbdb55..6bb42b785293 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -73,7 +73,7 @@ asm( \ __BTF_ID_LIST(name, local) \ extern u32 name[]; -#define BTF_ID_LIST_GLOBAL(name) \ +#define BTF_ID_LIST_GLOBAL(name, n) \ __BTF_ID_LIST(name, globl) /* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with @@ -83,7 +83,7 @@ __BTF_ID_LIST(name, globl) BTF_ID_LIST(name) \ BTF_ID(prefix, typename) #define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) \ - BTF_ID_LIST_GLOBAL(name) \ + BTF_ID_LIST_GLOBAL(name, 1) \ BTF_ID(prefix, typename) /* @@ -149,7 +149,7 @@ extern struct btf_id_set name; #define BTF_ID_LIST(name) static u32 name[5]; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED -#define BTF_ID_LIST_GLOBAL(name) u32 name[1]; +#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n]; #define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1]; #define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1]; #define BTF_SET_START(name) static struct btf_id_set name = { 0 }; -- cgit v1.2.3 From d19ddb476a539fd78ad1028ae13bb38506286931 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 12 Nov 2021 07:02:43 -0800 Subject: bpf: Introduce btf_tracing_ids Similar to btf_sock_ids, btf_tracing_ids provides btf ID for task_struct, file, and vm_area_struct via easy to understand format like btf_tracing_ids[BTF_TRACING_TYPE_[TASK|file|VMA]]. Suggested-by: Alexei Starovoitov Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211112150243.1270987-3-songliubraving@fb.com --- include/linux/btf_ids.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 6bb42b785293..919c0fde1c51 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -189,6 +189,18 @@ MAX_BTF_SOCK_TYPE, extern u32 btf_sock_ids[]; #endif -extern u32 btf_task_struct_ids[]; +#define BTF_TRACING_TYPE_xxx \ + BTF_TRACING_TYPE(BTF_TRACING_TYPE_TASK, task_struct) \ + BTF_TRACING_TYPE(BTF_TRACING_TYPE_FILE, file) \ + BTF_TRACING_TYPE(BTF_TRACING_TYPE_VMA, vm_area_struct) + +enum { +#define BTF_TRACING_TYPE(name, type) name, +BTF_TRACING_TYPE_xxx +#undef BTF_TRACING_TYPE +MAX_BTF_TRACING_TYPE, +}; + +extern u32 btf_tracing_ids[]; #endif -- cgit v1.2.3 From 1aa3b2207e889a948049c9a8016cedb0218c2389 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 12 Nov 2021 18:18:10 -0500 Subject: net,lsm,selinux: revert the security_sctp_assoc_established() hook This patch reverts two prior patches, e7310c94024c ("security: implement sctp_assoc_established hook in selinux") and 7c2ef0240e6a ("security: add sctp_assoc_established hook"), which create the security_sctp_assoc_established() LSM hook and provide a SELinux implementation. Unfortunately these two patches were merged without proper review (the Reviewed-by and Tested-by tags from Richard Haines were for previous revisions of these patches that were significantly different) and there are outstanding objections from the SELinux maintainers regarding these patches. Work is currently ongoing to correct the problems identified in the reverted patches, as well as others that have come up during review, but it is unclear at this point in time when that work will be ready for inclusion in the mainline kernel. In the interest of not keeping objectionable code in the kernel for multiple weeks, and potentially a kernel release, we are reverting the two problematic patches. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/linux/lsm_hook_defs.h | 2 -- include/linux/lsm_hooks.h | 5 ----- include/linux/security.h | 7 ------- 3 files changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 442a611fa0fb..df8de62f4710 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -335,8 +335,6 @@ LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname, struct sockaddr *address, int addrlen) LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc, struct sock *sk, struct sock *newsk) -LSM_HOOK(void, LSM_RET_VOID, sctp_assoc_established, struct sctp_association *asoc, - struct sk_buff *skb) #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index d6823214d5c1..d45b6f6e27fd 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1050,11 +1050,6 @@ * @asoc pointer to current sctp association structure. * @sk pointer to current sock structure. * @newsk pointer to new sock structure. - * @sctp_assoc_established: - * Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet - * to the security module. - * @asoc pointer to sctp association structure. - * @skb pointer to skbuff of association packet. * * Security hooks for Infiniband * diff --git a/include/linux/security.h b/include/linux/security.h index 06eac4e61a13..bbf44a466832 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1430,8 +1430,6 @@ int security_sctp_bind_connect(struct sock *sk, int optname, struct sockaddr *address, int addrlen); void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk); -void security_sctp_assoc_established(struct sctp_association *asoc, - struct sk_buff *skb); #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, @@ -1651,11 +1649,6 @@ static inline void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *newsk) { } - -static inline void security_sctp_assoc_established(struct sctp_association *asoc, - struct sk_buff *skb) -{ -} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND -- cgit v1.2.3 From 938aa33f14657c9ed9deea348b7d6f14b6d69cb7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 14 Nov 2021 13:28:34 -0500 Subject: tracing: Add length protection to histogram string copies The string copies to the histogram storage has a max size of 256 bytes (defined by MAX_FILTER_STR_VAL). Only the string size of the event field needs to be copied to the event storage, but no more than what is in the event storage. Although nothing should be bigger than 256 bytes, there's no protection against overwriting of the storage if one day there is. Copy no more than the destination size, and enforce it. Also had to turn MAX_FILTER_STR_VAL into an unsigned int, to keep the min() comparison of the string sizes of comparable types. Link: https://lore.kernel.org/all/CAHk-=wjREUihCGrtRBwfX47y_KrLCGjiq3t6QtoNJpmVrAEb1w@mail.gmail.com/ Link: https://lkml.kernel.org/r/20211114132834.183429a4@rorschach.local.home Cc: Ingo Molnar Cc: Andrew Morton Cc: Tom Zanussi Reported-by: Linus Torvalds Reviewed-by: Masami Hiramatsu Fixes: 63f84ae6b82b ("tracing/histogram: Do not copy the fixed-size char array field over the field size") Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 50453b287615..2d167ac3452c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -673,7 +673,7 @@ struct trace_event_file { #define PERF_MAX_TRACE_SIZE 8192 -#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ +#define MAX_FILTER_STR_VAL 256U /* Should handle KSYM_SYMBOL_LEN */ enum event_trigger_type { ETT_NONE = (0), -- cgit v1.2.3 From 0a08088f82c2db9cd6bce3258c79a76980c77651 Mon Sep 17 00:00:00 2001 From: Dillon Min Date: Tue, 19 Oct 2021 09:43:18 +0100 Subject: media: v4l2-mem2mem: add v4l2_m2m_get_unmapped_area for no-mmu platform For platforms without MMU the m2m provides a helper method v4l2_m2m_get_unmapped_area(), The mmap() routines will call this to get a proposed address for the mapping. More detailed information about get_unmapped_area can be found in Documentation/nommu-mmap.txt Signed-off-by: Dillon Min Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-mem2mem.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h index 5a91b548ecc0..fdbd5257e020 100644 --- a/include/media/v4l2-mem2mem.h +++ b/include/media/v4l2-mem2mem.h @@ -495,6 +495,11 @@ __poll_t v4l2_m2m_poll(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, int v4l2_m2m_mmap(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct vm_area_struct *vma); +#ifndef CONFIG_MMU +unsigned long v4l2_m2m_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); +#endif /** * v4l2_m2m_init() - initialize per-driver m2m data * -- cgit v1.2.3 From ef9f18a9e3a04126cf017216193166abe03dedef Mon Sep 17 00:00:00 2001 From: Dillon Min Date: Tue, 19 Oct 2021 09:43:21 +0100 Subject: media: v4l2-ctrls: Add RGB color effects control Add V4L2_COLORFX_SET_RGB color effects control, V4L2_CID_COLORFX_RGB for RGB color setting. with two mirror changes: - change 0xFFFFFF to 0xffffff - fix comments 2^24 to 2^24 - 1 [hverkuil: dropped spaces around + with V4L2_CID_BASE for consistency] Signed-off-by: Dillon Min Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 5fea5feb0412..c9eea93a43a9 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -128,6 +128,7 @@ enum v4l2_colorfx { V4L2_COLORFX_SOLARIZATION = 13, V4L2_COLORFX_ANTIQUE = 14, V4L2_COLORFX_SET_CBCR = 15, + V4L2_COLORFX_SET_RGB = 16, }; #define V4L2_CID_AUTOBRIGHTNESS (V4L2_CID_BASE+32) #define V4L2_CID_BAND_STOP_FILTER (V4L2_CID_BASE+33) @@ -145,9 +146,10 @@ enum v4l2_colorfx { #define V4L2_CID_ALPHA_COMPONENT (V4L2_CID_BASE+41) #define V4L2_CID_COLORFX_CBCR (V4L2_CID_BASE+42) +#define V4L2_CID_COLORFX_RGB (V4L2_CID_BASE+43) /* last CID + 1 */ -#define V4L2_CID_LASTP1 (V4L2_CID_BASE+43) +#define V4L2_CID_LASTP1 (V4L2_CID_BASE+44) /* USER-class private control IDs */ -- cgit v1.2.3 From 4c7924fb905b02323ff6d9d20f370892615dccfa Mon Sep 17 00:00:00 2001 From: Julien Massot Date: Fri, 22 Oct 2021 14:21:01 +0200 Subject: soc: renesas: rcar-rst: Add support to set rproc boot address R-Car Gen3 SoC series has a realtime processor, the boot address of this processor can be set thanks to CR7BAR register of the reset module. Export this function so that it's possible to set the boot address from a remoteproc driver. Also drop the __initdata qualifier on rcar_rst_base, since we will use this address later than init time. Signed-off-by: Julien Massot Link: https://lore.kernel.org/r/20211022122101.66998-1-julien.massot@iot.bzh Signed-off-by: Geert Uytterhoeven --- include/linux/soc/renesas/rcar-rst.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/soc/renesas/rcar-rst.h b/include/linux/soc/renesas/rcar-rst.h index 7899a5b8c247..1f1fe8bfaa76 100644 --- a/include/linux/soc/renesas/rcar-rst.h +++ b/include/linux/soc/renesas/rcar-rst.h @@ -4,8 +4,10 @@ #ifdef CONFIG_RST_RCAR int rcar_rst_read_mode_pins(u32 *mode); +int rcar_rst_set_rproc_boot_addr(u64 boot_addr); #else static inline int rcar_rst_read_mode_pins(u32 *mode) { return -ENODEV; } +static inline int rcar_rst_set_rproc_boot_addr(u64 boot_addr) { return -ENODEV; } #endif #endif /* __LINUX_SOC_RENESAS_RCAR_RST_H__ */ -- cgit v1.2.3 From 032816fbbfafe3198bb5c71fbbe4e8e5be33b352 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Wed, 27 Oct 2021 14:45:07 +0100 Subject: pinctrl: pinconf-generic: Add support for "output-impedance-ohms" to be extracted from DT files Add "output-impedance-ohms" property to generic options used for DT parsing files. This enables drivers, which use generic pin configurations, to get the value passed to this property. Signed-off-by: Lad Prabhakar Reviewed-by: Biju Das Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20211027134509.5036-3-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/linux/pinctrl/pinconf-generic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index eee0e3948537..2422211d6a5a 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -91,6 +91,8 @@ struct pinctrl_map; * configuration (eg. the currently selected mux function) drive values on * the line. Use argument 1 to enable output mode, argument 0 to disable * it. + * @PIN_CONFIG_OUTPUT_IMPEDANCE_OHMS: this will configure the output impedance + * of the pin with the value passed as argument. The argument is in ohms. * @PIN_CONFIG_PERSIST_STATE: retain pin state across sleep or controller reset * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power * supplies, the argument to this parameter (on a custom format) tells @@ -129,6 +131,7 @@ enum pin_config_param { PIN_CONFIG_MODE_PWM, PIN_CONFIG_OUTPUT, PIN_CONFIG_OUTPUT_ENABLE, + PIN_CONFIG_OUTPUT_IMPEDANCE_OHMS, PIN_CONFIG_PERSIST_STATE, PIN_CONFIG_POWER_SOURCE, PIN_CONFIG_SKEW_DELAY, -- cgit v1.2.3 From 5a363c20673308e968b6640deb73d7bf77e8b463 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 8 Nov 2021 10:31:47 +0100 Subject: drm/shmem-helper: Unexport drm_gem_shmem_create_with_handle() Turn drm_gem_shmem_create_with_handle() into an internal helper function. It's not used outside of the compilation unit. Signed-off-by: Thomas Zimmermann Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20211108093149.7226-2-tzimmermann@suse.de --- include/drm/drm_gem_shmem_helper.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 434328d8a0d9..6b47eb7d9f76 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -128,11 +128,6 @@ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem void drm_gem_shmem_purge_locked(struct drm_gem_object *obj); bool drm_gem_shmem_purge(struct drm_gem_object *obj); -struct drm_gem_shmem_object * -drm_gem_shmem_create_with_handle(struct drm_file *file_priv, - struct drm_device *dev, size_t size, - uint32_t *handle); - int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, struct drm_mode_create_dumb *args); -- cgit v1.2.3 From c7fbcb7149ff9321bbbcc93c9920de534ea8102c Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 8 Nov 2021 10:31:48 +0100 Subject: drm/shmem-helper: Export dedicated wrappers for GEM object functions Wrap GEM SHMEM functions for struct drm_gem_object_funcs and update all callers. This will allow for an update of the public interfaces of the GEM SHMEM helper library. v2: * fix docs for drm_gem_shmem_object_print_info() Signed-off-by: Thomas Zimmermann Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20211108093149.7226-3-tzimmermann@suse.de --- include/drm/drm_gem_shmem_helper.h | 120 +++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 6b47eb7d9f76..4199877ae588 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -137,6 +137,126 @@ void drm_gem_shmem_print_info(struct drm_printer *p, unsigned int indent, const struct drm_gem_object *obj); struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_object *obj); + +/* + * GEM object functions + */ + +/** + * drm_gem_shmem_object_free - GEM object function for drm_gem_shmem_free_object() + * @obj: GEM object to free + * + * This function wraps drm_gem_shmem_free_object(). Drivers that employ the shmem helpers + * should use it as their &drm_gem_object_funcs.free handler. + */ +static inline void drm_gem_shmem_object_free(struct drm_gem_object *obj) +{ + drm_gem_shmem_free_object(obj); +} + +/** + * drm_gem_shmem_object_print_info() - Print &drm_gem_shmem_object info for debugfs + * @p: DRM printer + * @indent: Tab indentation level + * @obj: GEM object + * + * This function wraps drm_gem_shmem_print_info(). Drivers that employ the shmem helpers should + * use this function as their &drm_gem_object_funcs.print_info handler. + */ +static inline void drm_gem_shmem_object_print_info(struct drm_printer *p, unsigned int indent, + const struct drm_gem_object *obj) +{ + drm_gem_shmem_print_info(p, indent, obj); +} + +/** + * drm_gem_shmem_object_pin - GEM object function for drm_gem_shmem_pin() + * @obj: GEM object + * + * This function wraps drm_gem_shmem_pin(). Drivers that employ the shmem helpers should + * use it as their &drm_gem_object_funcs.pin handler. + */ +static inline int drm_gem_shmem_object_pin(struct drm_gem_object *obj) +{ + return drm_gem_shmem_pin(obj); +} + +/** + * drm_gem_shmem_object_unpin - GEM object function for drm_gem_shmem_unpin() + * @obj: GEM object + * + * This function wraps drm_gem_shmem_unpin(). Drivers that employ the shmem helpers should + * use it as their &drm_gem_object_funcs.unpin handler. + */ +static inline void drm_gem_shmem_object_unpin(struct drm_gem_object *obj) +{ + drm_gem_shmem_unpin(obj); +} + +/** + * drm_gem_shmem_object_get_sg_table - GEM object function for drm_gem_shmem_get_sg_table() + * @obj: GEM object + * + * This function wraps drm_gem_shmem_get_sg_table(). Drivers that employ the shmem helpers should + * use it as their &drm_gem_object_funcs.get_sg_table handler. + * + * Returns: + * A pointer to the scatter/gather table of pinned pages or NULL on failure. + */ +static inline struct sg_table *drm_gem_shmem_object_get_sg_table(struct drm_gem_object *obj) +{ + return drm_gem_shmem_get_sg_table(obj); +} + +/* + * drm_gem_shmem_object_vmap - GEM object function for drm_gem_shmem_vmap() + * @obj: GEM object + * @map: Returns the kernel virtual address of the SHMEM GEM object's backing store. + * + * This function wraps drm_gem_shmem_vmap(). Drivers that employ the shmem helpers should + * use it as their &drm_gem_object_funcs.vmap handler. + * + * Returns: + * 0 on success or a negative error code on failure. + */ +static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) +{ + return drm_gem_shmem_vmap(obj, map); +} + +/* + * drm_gem_shmem_object_vunmap - GEM object function for drm_gem_shmem_vunmap() + * @obj: GEM object + * @map: Kernel virtual address where the SHMEM GEM object was mapped + * + * This function wraps drm_gem_shmem_vunmap(). Drivers that employ the shmem helpers should + * use it as their &drm_gem_object_funcs.vunmap handler. + */ +static inline void drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map) +{ + drm_gem_shmem_vunmap(obj, map); +} + +/** + * drm_gem_shmem_object_mmap - GEM object function for drm_gem_shmem_mmap() + * @obj: GEM object + * @vma: VMA for the area to be mapped + * + * This function wraps drm_gem_shmem_mmap(). Drivers that employ the shmem helpers should + * use it as their &drm_gem_object_funcs.mmap handler. + * + * Returns: + * 0 on success or a negative error code on failure. + */ +static inline int drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + return drm_gem_shmem_mmap(obj, vma); +} + +/* + * Driver ops + */ + struct drm_gem_object * drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, -- cgit v1.2.3 From a193f3b4e050e35c506a34d0870c838d8e0b0449 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 8 Nov 2021 10:31:49 +0100 Subject: drm/shmem-helper: Pass GEM shmem object in public interfaces Change all GEM SHMEM object functions that receive a GEM object of type struct drm_gem_object to expect an object of type struct drm_gem_shmem_object instead. This change reduces the number of upcasts from struct drm_gem_object by moving them into callers. The C compiler can now verify that the GEM SHMEM functions are called with the correct type. For consistency, the patch also renames drm_gem_shmem_free_object to drm_gem_shmem_free. It further updates documentation for a number of functions. v3: * fix docs for drm_gem_shmem_object_free() v2: * mention _object_ callbacks in docs (Daniel) Signed-off-by: Thomas Zimmermann Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20211108093149.7226-4-tzimmermann@suse.de --- include/drm/drm_gem_shmem_helper.h | 69 ++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 4199877ae588..311d66c9cf4b 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -107,16 +107,17 @@ struct drm_gem_shmem_object { container_of(obj, struct drm_gem_shmem_object, base) struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size); -void drm_gem_shmem_free_object(struct drm_gem_object *obj); +void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem); int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem); void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem); -int drm_gem_shmem_pin(struct drm_gem_object *obj); -void drm_gem_shmem_unpin(struct drm_gem_object *obj); -int drm_gem_shmem_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); -void drm_gem_shmem_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map); +int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem); +void drm_gem_shmem_unpin(struct drm_gem_shmem_object *shmem); +int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map); +void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map); +int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma); -int drm_gem_shmem_madvise(struct drm_gem_object *obj, int madv); +int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv); static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem) { @@ -125,33 +126,31 @@ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem !shmem->base.dma_buf && !shmem->base.import_attach; } -void drm_gem_shmem_purge_locked(struct drm_gem_object *obj); -bool drm_gem_shmem_purge(struct drm_gem_object *obj); +void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem); +bool drm_gem_shmem_purge(struct drm_gem_shmem_object *shmem); -int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, - struct drm_mode_create_dumb *args); - -int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); - -void drm_gem_shmem_print_info(struct drm_printer *p, unsigned int indent, - const struct drm_gem_object *obj); +struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_shmem_object *shmem); +struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_shmem_object *shmem); -struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_object *obj); +void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem, + struct drm_printer *p, unsigned int indent); /* * GEM object functions */ /** - * drm_gem_shmem_object_free - GEM object function for drm_gem_shmem_free_object() + * drm_gem_shmem_object_free - GEM object function for drm_gem_shmem_free() * @obj: GEM object to free * - * This function wraps drm_gem_shmem_free_object(). Drivers that employ the shmem helpers + * This function wraps drm_gem_shmem_free(). Drivers that employ the shmem helpers * should use it as their &drm_gem_object_funcs.free handler. */ static inline void drm_gem_shmem_object_free(struct drm_gem_object *obj) { - drm_gem_shmem_free_object(obj); + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + + drm_gem_shmem_free(shmem); } /** @@ -166,7 +165,9 @@ static inline void drm_gem_shmem_object_free(struct drm_gem_object *obj) static inline void drm_gem_shmem_object_print_info(struct drm_printer *p, unsigned int indent, const struct drm_gem_object *obj) { - drm_gem_shmem_print_info(p, indent, obj); + const struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + + drm_gem_shmem_print_info(shmem, p, indent); } /** @@ -178,7 +179,9 @@ static inline void drm_gem_shmem_object_print_info(struct drm_printer *p, unsign */ static inline int drm_gem_shmem_object_pin(struct drm_gem_object *obj) { - return drm_gem_shmem_pin(obj); + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + + return drm_gem_shmem_pin(shmem); } /** @@ -190,7 +193,9 @@ static inline int drm_gem_shmem_object_pin(struct drm_gem_object *obj) */ static inline void drm_gem_shmem_object_unpin(struct drm_gem_object *obj) { - drm_gem_shmem_unpin(obj); + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + + drm_gem_shmem_unpin(shmem); } /** @@ -205,7 +210,9 @@ static inline void drm_gem_shmem_object_unpin(struct drm_gem_object *obj) */ static inline struct sg_table *drm_gem_shmem_object_get_sg_table(struct drm_gem_object *obj) { - return drm_gem_shmem_get_sg_table(obj); + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + + return drm_gem_shmem_get_sg_table(shmem); } /* @@ -221,7 +228,9 @@ static inline struct sg_table *drm_gem_shmem_object_get_sg_table(struct drm_gem_ */ static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) { - return drm_gem_shmem_vmap(obj, map); + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + + return drm_gem_shmem_vmap(shmem, map); } /* @@ -234,7 +243,9 @@ static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, struct d */ static inline void drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map) { - drm_gem_shmem_vunmap(obj, map); + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + + drm_gem_shmem_vunmap(shmem, map); } /** @@ -250,7 +261,9 @@ static inline void drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, struc */ static inline int drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { - return drm_gem_shmem_mmap(obj, vma); + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + + return drm_gem_shmem_mmap(shmem, vma); } /* @@ -261,8 +274,8 @@ struct drm_gem_object * drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); - -struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj); +int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, + struct drm_mode_create_dumb *args); /** * DRM_GEM_SHMEM_DRIVER_OPS - Default shmem GEM operations -- cgit v1.2.3 From 507805b83ff108473dba9d4909e41abd50cf07f5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 10 Nov 2021 15:47:42 +0200 Subject: gpiolib: acpi: Remove never used devm_acpi_dev_remove_driver_gpios() Remove never used devm_acpi_dev_remove_driver_gpios(). Signed-off-by: Andy Shevchenko Reviewed-by: Mika Westerberg --- include/linux/gpio/consumer.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 97a28ad3393b..3ad67b4a72be 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -690,7 +690,6 @@ void acpi_dev_remove_driver_gpios(struct acpi_device *adev); int devm_acpi_dev_add_driver_gpios(struct device *dev, const struct acpi_gpio_mapping *gpios); -void devm_acpi_dev_remove_driver_gpios(struct device *dev); struct gpio_desc *acpi_get_and_request_gpiod(char *path, int pin, char *label); @@ -708,7 +707,6 @@ static inline int devm_acpi_dev_add_driver_gpios(struct device *dev, { return -ENXIO; } -static inline void devm_acpi_dev_remove_driver_gpios(struct device *dev) {} #endif /* CONFIG_GPIOLIB && CONFIG_ACPI */ -- cgit v1.2.3 From 10a2308ffb8cf262e473eb324fde42ae31b6da04 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 12 Nov 2021 18:16:34 +0800 Subject: net: Clean up some inconsistent indenting Eliminate the follow smatch warning: ./include/linux/skbuff.h:4229 skb_remcsum_process() warn: inconsistent indenting. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 686a666d073d..c8cb7e697d47 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4226,7 +4226,7 @@ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr, return; } - if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) { + if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) { __skb_checksum_complete(skb); skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data); } -- cgit v1.2.3 From 168eed447129899611098219b70ef97b605bc6e1 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Tue, 2 Nov 2021 12:10:17 +0200 Subject: ASoC: SOF: IPC: Add new IPC command to free trace DMA Add a new SOF_IPC_TRACE_DMA_FREE IPC command to stop and free trace DMA in the FW. Signed-off-by: Ranjani Sridharan Reviewed-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20211102101019.14037-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof/header.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/sof/header.h b/include/sound/sof/header.h index 4c747c52e01b..b97a76bcb655 100644 --- a/include/sound/sof/header.h +++ b/include/sound/sof/header.h @@ -119,6 +119,7 @@ #define SOF_IPC_TRACE_DMA_POSITION SOF_CMD_TYPE(0x002) #define SOF_IPC_TRACE_DMA_PARAMS_EXT SOF_CMD_TYPE(0x003) #define SOF_IPC_TRACE_FILTER_UPDATE SOF_CMD_TYPE(0x004) /**< ABI3.17 */ +#define SOF_IPC_TRACE_DMA_FREE SOF_CMD_TYPE(0x005) /**< ABI3.20 */ /* debug */ #define SOF_IPC_DEBUG_MEM_USAGE SOF_CMD_TYPE(0x001) -- cgit v1.2.3 From 02d6fdecb9c38de19065f6bed8d5214556fd061d Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 4 Nov 2021 16:00:40 +0100 Subject: regmap: allow to define reg_update_bits for no bus configuration Some device requires a special handling for reg_update_bits and can't use the normal regmap read write logic. An example is when locking is handled by the device and rmw operations requires to do atomic operations. Allow to declare a dedicated function in regmap_config for reg_update_bits in no bus configuration. Signed-off-by: Ansuel Smith Link: https://lore.kernel.org/r/20211104150040.1260-1-ansuelsmth@gmail.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index e3c9a25a853a..22652e5fbc38 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -290,6 +290,11 @@ typedef void (*regmap_unlock)(void *); * read operation on a bus such as SPI, I2C, etc. Most of the * devices do not need this. * @reg_write: Same as above for writing. + * @reg_update_bits: Optional callback that if filled will be used to perform + * all the update_bits(rmw) operation. Should only be provided + * if the function require special handling with lock and reg + * handling and the operation cannot be represented as a simple + * update_bits operation on a bus such as SPI, I2C, etc. * @fast_io: Register IO is fast. Use a spinlock instead of a mutex * to perform locking. This field is ignored if custom lock/unlock * functions are used (see fields lock/unlock of struct regmap_config). @@ -372,6 +377,8 @@ struct regmap_config { int (*reg_read)(void *context, unsigned int reg, unsigned int *val); int (*reg_write)(void *context, unsigned int reg, unsigned int val); + int (*reg_update_bits)(void *context, unsigned int reg, + unsigned int mask, unsigned int val); bool fast_io; -- cgit v1.2.3 From 45971bdd8ca8b5a99a49f4db86737401c45e246f Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 2 Nov 2021 16:02:02 -0600 Subject: spi: remove unused header file Commit 6acaadc852f1 ("spi: clps711x: Driver refactor") removed the only use of , but left the header file behind. This file is unused, delete it. Cc: Signed-off-by: Arnd Bergmann Cc: Signed-off-by: Mark Brown Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Jonathan Corbet Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211102220203.940290-9-corbet@lwn.net Signed-off-by: Mark Brown --- include/linux/platform_data/spi-clps711x.h | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 include/linux/platform_data/spi-clps711x.h (limited to 'include') diff --git a/include/linux/platform_data/spi-clps711x.h b/include/linux/platform_data/spi-clps711x.h deleted file mode 100644 index efaa596848c9..000000000000 --- a/include/linux/platform_data/spi-clps711x.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * CLPS711X SPI bus driver definitions - * - * Copyright (C) 2012 Alexander Shiyan - */ - -#ifndef ____LINUX_PLATFORM_DATA_SPI_CLPS711X_H -#define ____LINUX_PLATFORM_DATA_SPI_CLPS711X_H - -/* Board specific platform_data */ -struct spi_clps711x_pdata { - int *chipselect; /* Array of GPIO-numbers */ - int num_chipselect; /* Total count of GPIOs */ -}; - -#endif -- cgit v1.2.3 From a0ddee65c527d877e798205c1391c6170e580c66 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 12 Nov 2021 16:07:49 +0200 Subject: printk: Remove printk.h inclusion in percpu.h After the commit 42a0bb3f7138 ("printk/nmi: generic solution for safe printk in NMI") the printk.h is not needed anymore in percpu.h. Moreover `make headerdep` complains (an excerpt) In file included from linux/printk.h, from linux/dynamic_debug.h:188 from linux/printk.h:559 <-- here from linux/percpu.h:9 from linux/idr.h:17 include/net/9p/client.h:13: warning: recursive header inclusion Yeah, it's not a root cause of this, but removing will help to reduce the noise. Fixes: 42a0bb3f7138 ("printk/nmi: generic solution for safe printk in NMI") Signed-off-by: Andy Shevchenko Acked-by: Dennis Zhou Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20211112140749.80042-1-andriy.shevchenko@linux.intel.com --- include/linux/percpu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 5e76af742c80..4fa3000f9c22 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 13cae4a104d2b7205696229ba85d34cc035f8c84 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Mon, 15 Nov 2021 10:49:21 +0800 Subject: i2c: core: Allow 255 byte transfers for SMBus 3.x SMBus 3.0 increased the maximum block transfer size from 32 bytes to 255 bytes. We increase the size of struct i2c_smbus_data's block[] member. i2c_smbus_xfer() and i2c_smbus_xfer_emulated() now support 255 byte block operations, other block functions remain limited to 32 bytes for compatibility with existing callers. We allow adapters to indicate support for the larger size with I2C_FUNC_SMBUS_V3_BLOCK. Most emulated drivers should be able to use 255 byte blocks by replacing I2C_SMBUS_BLOCK_MAX with I2C_SMBUS_V3_BLOCK_MAX though some will have hardware limitations that need testing. Signed-off-by: Matt Johnston Signed-off-by: David S. Miller --- include/linux/i2c.h | 13 +++++++++++++ include/uapi/linux/i2c.h | 5 ++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 16119ac1aa97..353d6b4e7a53 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -52,6 +52,19 @@ typedef int (*i2c_slave_cb_t)(struct i2c_client *client, struct module; struct property_entry; +/* SMBus 3.0 extends the maximum block read/write size to 255 (from 32). + * The larger size is only supported by some drivers, indicated by + * the I2C_FUNC_SMBUS_V3_BLOCK functionality bit. + */ +#define I2C_SMBUS_V3_BLOCK_MAX 255 /* As specified in SMBus 3.0 standard */ + +/* Note compatibility definition in uapi header with 32 byte block */ +union i2c_smbus_data { + __u8 byte; + __u16 word; + __u8 block[I2C_SMBUS_V3_BLOCK_MAX + 1]; /* block[0] is used for length */ +}; + #if IS_ENABLED(CONFIG_I2C) /* Return the Frequency mode string based on the bus frequency */ const char *i2c_freq_mode_string(u32 bus_freq_hz); diff --git a/include/uapi/linux/i2c.h b/include/uapi/linux/i2c.h index 92326ebde350..7b7d90b50cf0 100644 --- a/include/uapi/linux/i2c.h +++ b/include/uapi/linux/i2c.h @@ -108,6 +108,7 @@ struct i2c_msg { #define I2C_FUNC_SMBUS_READ_I2C_BLOCK 0x04000000 /* I2C-like block xfer */ #define I2C_FUNC_SMBUS_WRITE_I2C_BLOCK 0x08000000 /* w/ 1-byte reg. addr. */ #define I2C_FUNC_SMBUS_HOST_NOTIFY 0x10000000 /* SMBus 2.0 or later */ +#define I2C_FUNC_SMBUS_V3_BLOCK 0x20000000 /* Device supports 255 byte block */ #define I2C_FUNC_SMBUS_BYTE (I2C_FUNC_SMBUS_READ_BYTE | \ I2C_FUNC_SMBUS_WRITE_BYTE) @@ -137,13 +138,15 @@ struct i2c_msg { /* * Data for SMBus Messages */ -#define I2C_SMBUS_BLOCK_MAX 32 /* As specified in SMBus standard */ +#define I2C_SMBUS_BLOCK_MAX 32 /* As specified in SMBus 2.0 standard */ +#ifndef __KERNEL__ union i2c_smbus_data { __u8 byte; __u16 word; __u8 block[I2C_SMBUS_BLOCK_MAX + 2]; /* block[0] is used for length */ /* and one more for user-space compatibility */ }; +#endif /* i2c_smbus_xfer read or write markers */ #define I2C_SMBUS_READ 1 -- cgit v1.2.3 From 84a107e68b34217eff536e81a6a6f419ee0d0f7e Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Mon, 15 Nov 2021 10:49:22 +0800 Subject: i2c: dev: Handle 255 byte blocks for i2c ioctl I2C_SMBUS is limited to 32 bytes due to compatibility with the 32 byte i2c_smbus_data.block I2C_RDWR allows larger transfers if sufficient sized buffers are passed. Signed-off-by: Matt Johnston Signed-off-by: David S. Miller --- include/uapi/linux/i2c-dev.h | 2 ++ include/uapi/linux/i2c.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/i2c-dev.h b/include/uapi/linux/i2c-dev.h index 1c4cec4ddd84..46ce31d42f7d 100644 --- a/include/uapi/linux/i2c-dev.h +++ b/include/uapi/linux/i2c-dev.h @@ -39,12 +39,14 @@ /* This is the structure as used in the I2C_SMBUS ioctl call */ +#ifndef __KERNEL__ struct i2c_smbus_ioctl_data { __u8 read_write; __u8 command; __u32 size; union i2c_smbus_data __user *data; }; +#endif /* This is the structure as used in the I2C_RDWR ioctl call */ struct i2c_rdwr_ioctl_data { diff --git a/include/uapi/linux/i2c.h b/include/uapi/linux/i2c.h index 7b7d90b50cf0..c3534ab1ae53 100644 --- a/include/uapi/linux/i2c.h +++ b/include/uapi/linux/i2c.h @@ -109,6 +109,8 @@ struct i2c_msg { #define I2C_FUNC_SMBUS_WRITE_I2C_BLOCK 0x08000000 /* w/ 1-byte reg. addr. */ #define I2C_FUNC_SMBUS_HOST_NOTIFY 0x10000000 /* SMBus 2.0 or later */ #define I2C_FUNC_SMBUS_V3_BLOCK 0x20000000 /* Device supports 255 byte block */ + /* Note that I2C_SMBUS ioctl only */ + /* supports a 32 byte block */ #define I2C_FUNC_SMBUS_BYTE (I2C_FUNC_SMBUS_READ_BYTE | \ I2C_FUNC_SMBUS_WRITE_BYTE) -- cgit v1.2.3 From 34ae2c09d46a2d0abd907e139b466f798e4095a8 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 15 Nov 2021 10:00:27 +0000 Subject: net: phylink: add generic validate implementation Add a generic validate() implementation using the supported_interfaces and a bitmask of MAC pause/speed/duplex capabilities. This allows us to entirely eliminate many driver private validate() implementations. We expose the underlying phylink_get_linkmodes() function so that drivers which have special needs can still benefit from conversion. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/phylink.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index f037470b6fb3..3563820a1765 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -20,6 +20,29 @@ enum { MLO_AN_PHY = 0, /* Conventional PHY */ MLO_AN_FIXED, /* Fixed-link mode */ MLO_AN_INBAND, /* In-band protocol */ + + MAC_SYM_PAUSE = BIT(0), + MAC_ASYM_PAUSE = BIT(1), + MAC_10HD = BIT(2), + MAC_10FD = BIT(3), + MAC_10 = MAC_10HD | MAC_10FD, + MAC_100HD = BIT(4), + MAC_100FD = BIT(5), + MAC_100 = MAC_100HD | MAC_100FD, + MAC_1000HD = BIT(6), + MAC_1000FD = BIT(7), + MAC_1000 = MAC_1000HD | MAC_1000FD, + MAC_2500FD = BIT(8), + MAC_5000FD = BIT(9), + MAC_10000FD = BIT(10), + MAC_20000FD = BIT(11), + MAC_25000FD = BIT(12), + MAC_40000FD = BIT(13), + MAC_50000FD = BIT(14), + MAC_56000FD = BIT(15), + MAC_100000FD = BIT(16), + MAC_200000FD = BIT(17), + MAC_400000FD = BIT(18), }; static inline bool phylink_autoneg_inband(unsigned int mode) @@ -69,6 +92,7 @@ enum phylink_op_type { * if MAC link is at %MLO_AN_FIXED mode. * @supported_interfaces: bitmap describing which PHY_INTERFACE_MODE_xxx * are supported by the MAC/PCS. + * @mac_capabilities: MAC pause/speed/duplex capabilities. */ struct phylink_config { struct device *dev; @@ -79,6 +103,7 @@ struct phylink_config { void (*get_fixed_state)(struct phylink_config *config, struct phylink_link_state *state); DECLARE_PHY_INTERFACE_MASK(supported_interfaces); + unsigned long mac_capabilities; }; /** @@ -442,6 +467,12 @@ void pcs_link_up(struct phylink_pcs *pcs, unsigned int mode, phy_interface_t interface, int speed, int duplex); #endif +void phylink_get_linkmodes(unsigned long *linkmodes, phy_interface_t interface, + unsigned long mac_capabilities); +void phylink_generic_validate(struct phylink_config *config, + unsigned long *supported, + struct phylink_link_state *state); + struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, phy_interface_t iface, const struct phylink_mac_ops *mac_ops); -- cgit v1.2.3 From 2f6a470d6545841cf1891b87e360d3998ef024c8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 15 Nov 2021 07:49:46 -0800 Subject: Revert "Merge branch 'mctp-i2c-driver'" This reverts commit 71812af7234f30362b43ccff33f93890ae4c0655, reversing changes made to cc0be1ad686fb29a4d127948486f40b17fb34b50. Wolfram Sang says: Please revert. Besides the driver in net, it modifies the I2C core code. This has not been acked by the I2C maintainer (in this case me). So, please don't pull this in via the net tree. The question raised here (extending SMBus calls to 255 byte) is complicated because we need ABI backwards compatibility. Link: https://lore.kernel.org/all/YZJ9H4eM%2FM7OXVN0@shikoro/ Signed-off-by: Jakub Kicinski --- include/linux/i2c.h | 13 ------------- include/uapi/linux/i2c-dev.h | 2 -- include/uapi/linux/i2c.h | 7 +------ 3 files changed, 1 insertion(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 353d6b4e7a53..16119ac1aa97 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -52,19 +52,6 @@ typedef int (*i2c_slave_cb_t)(struct i2c_client *client, struct module; struct property_entry; -/* SMBus 3.0 extends the maximum block read/write size to 255 (from 32). - * The larger size is only supported by some drivers, indicated by - * the I2C_FUNC_SMBUS_V3_BLOCK functionality bit. - */ -#define I2C_SMBUS_V3_BLOCK_MAX 255 /* As specified in SMBus 3.0 standard */ - -/* Note compatibility definition in uapi header with 32 byte block */ -union i2c_smbus_data { - __u8 byte; - __u16 word; - __u8 block[I2C_SMBUS_V3_BLOCK_MAX + 1]; /* block[0] is used for length */ -}; - #if IS_ENABLED(CONFIG_I2C) /* Return the Frequency mode string based on the bus frequency */ const char *i2c_freq_mode_string(u32 bus_freq_hz); diff --git a/include/uapi/linux/i2c-dev.h b/include/uapi/linux/i2c-dev.h index 46ce31d42f7d..1c4cec4ddd84 100644 --- a/include/uapi/linux/i2c-dev.h +++ b/include/uapi/linux/i2c-dev.h @@ -39,14 +39,12 @@ /* This is the structure as used in the I2C_SMBUS ioctl call */ -#ifndef __KERNEL__ struct i2c_smbus_ioctl_data { __u8 read_write; __u8 command; __u32 size; union i2c_smbus_data __user *data; }; -#endif /* This is the structure as used in the I2C_RDWR ioctl call */ struct i2c_rdwr_ioctl_data { diff --git a/include/uapi/linux/i2c.h b/include/uapi/linux/i2c.h index c3534ab1ae53..92326ebde350 100644 --- a/include/uapi/linux/i2c.h +++ b/include/uapi/linux/i2c.h @@ -108,9 +108,6 @@ struct i2c_msg { #define I2C_FUNC_SMBUS_READ_I2C_BLOCK 0x04000000 /* I2C-like block xfer */ #define I2C_FUNC_SMBUS_WRITE_I2C_BLOCK 0x08000000 /* w/ 1-byte reg. addr. */ #define I2C_FUNC_SMBUS_HOST_NOTIFY 0x10000000 /* SMBus 2.0 or later */ -#define I2C_FUNC_SMBUS_V3_BLOCK 0x20000000 /* Device supports 255 byte block */ - /* Note that I2C_SMBUS ioctl only */ - /* supports a 32 byte block */ #define I2C_FUNC_SMBUS_BYTE (I2C_FUNC_SMBUS_READ_BYTE | \ I2C_FUNC_SMBUS_WRITE_BYTE) @@ -140,15 +137,13 @@ struct i2c_msg { /* * Data for SMBus Messages */ -#define I2C_SMBUS_BLOCK_MAX 32 /* As specified in SMBus 2.0 standard */ -#ifndef __KERNEL__ +#define I2C_SMBUS_BLOCK_MAX 32 /* As specified in SMBus standard */ union i2c_smbus_data { __u8 byte; __u16 word; __u8 block[I2C_SMBUS_BLOCK_MAX + 2]; /* block[0] is used for length */ /* and one more for user-space compatibility */ }; -#endif /* i2c_smbus_xfer read or write markers */ #define I2C_SMBUS_READ 1 -- cgit v1.2.3 From ce6838afc9244171cd07620bbb82e18695c491e9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 15 Nov 2021 13:53:12 +0200 Subject: agp/intel-gtt: Replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Signed-off-by: Andy Shevchenko Signed-off-by: Jani Nikula Acked-by: Daniel Vetter Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/4d6a976459547407979f4b4c05a52785523e6bd8.1636977089.git.jani.nikula@intel.com --- include/drm/intel-gtt.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index abfefaaf897a..4e5f8e7e25d0 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -6,7 +6,10 @@ #include #include -#include +#include + +struct pci_dev; +struct sg_table; void intel_gtt_get(u64 *gtt_total, phys_addr_t *mappable_base, -- cgit v1.2.3 From 7e78153aef7f9efcb935487402151de31e0836ad Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 15 Nov 2021 13:53:13 +0200 Subject: agp/intel-gtt: reduce intel-gtt dependencies more Don't include stuff on behalf of users if they're not strictly necessary for the header. Cc: Andy Shevchenko Signed-off-by: Jani Nikula Acked-by: Daniel Vetter Acked-by: Andy Shevchenko Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/7bcaa1684587b9b008d3c41468fb40e63c54fbc7.1636977089.git.jani.nikula@intel.com --- include/drm/intel-gtt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index 4e5f8e7e25d0..67530bfef129 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -4,10 +4,9 @@ #ifndef _DRM_INTEL_GTT_H #define _DRM_INTEL_GTT_H -#include -#include #include +struct agp_bridge_data; struct pci_dev; struct sg_table; -- cgit v1.2.3 From f64bd790b750dd281406964af40d16adfc88a074 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 29 Oct 2021 12:51:32 -0700 Subject: ACPI: Keep sub-table parsing infrastructure available for modules The NFIT driver and now the CXL ACPI driver have both open-coded ACPI table parsing. Before another instance is added arrange for the core ACPI sub-table parsing to be optionally available to drivers via the CONFIG_ACPI_TABLE_LIB symbol. If no drivers select the symbol then the infrastructure reverts back to being tagged __init via the __init_or_acpilib annotation. For now, only tag the core sub-table routines and data that the CEDT parsing in the cxl_acpi driver would want to reuse, a CEDT parsing helper is added in a later change. Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Alison Schofield Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/163553709227.2509508.8215196520233473814.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- include/linux/acpi.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 143ce7e0bee1..edfa3c8f3562 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -232,14 +232,22 @@ int acpi_locate_initial_tables (void); void acpi_reserve_initial_tables (void); void acpi_table_init_complete (void); int acpi_table_init (void); + +#ifdef CONFIG_ACPI_TABLE_LIB +#define __init_or_acpilib +#define __initdata_or_acpilib +#else +#define __init_or_acpilib __init +#define __initdata_or_acpilib __initdata +#endif + int acpi_table_parse(char *id, acpi_tbl_table_handler handler); -int __init acpi_table_parse_entries(char *id, unsigned long table_size, - int entry_id, - acpi_tbl_entry_handler handler, - unsigned int max_entries); -int __init acpi_table_parse_entries_array(char *id, unsigned long table_size, - struct acpi_subtable_proc *proc, int proc_num, - unsigned int max_entries); +int __init_or_acpilib acpi_table_parse_entries(char *id, + unsigned long table_size, int entry_id, + acpi_tbl_entry_handler handler, unsigned int max_entries); +int __init_or_acpilib acpi_table_parse_entries_array(char *id, + unsigned long table_size, struct acpi_subtable_proc *proc, + int proc_num, unsigned int max_entries); int acpi_table_parse_madt(enum acpi_madt_type id, acpi_tbl_entry_handler handler, unsigned int max_entries); -- cgit v1.2.3 From ad2f63971e9655e3987db32dac85aa50658790eb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 29 Oct 2021 12:51:37 -0700 Subject: ACPI: Teach ACPI table parsing about the CEDT header format The CEDT adds yet one more unique subtable header type where the length is a 16-bit value. Extend the subtable helpers to detect this scenario. Cc: "Rafael J. Wysocki" Cc: Len Brown Tested-by: Alison Schofield Reviewed-by: Alison Schofield Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/163553709742.2509508.5177761945441327574.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index edfa3c8f3562..6b7f181d51e2 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -133,6 +133,7 @@ union acpi_subtable_headers { struct acpi_subtable_header common; struct acpi_hmat_structure hmat; struct acpi_prmt_module_header prmt; + struct acpi_cedt_header cedt; }; typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table); -- cgit v1.2.3 From 2d03e46a4bad20191d07b83ec1242d5f002577be Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 29 Oct 2021 12:51:42 -0700 Subject: ACPI: Add a context argument for table parsing handlers In preparation for drivers reusing the core table parsing infrastructure, arrange for handlers to take a context argument. This allows driver table parsing to wrap ACPI table entries in driver-specific data. The first consumer of this infrastructure is the CEDT parsing that happens in the cxl_acpi driver, add a conditional (CONFIG_ACPI_TABLE_LIB=y) export of a acpi_table_parse_cedt() helper for this case. Cc: "Rafael J. Wysocki" Cc: Len Brown Tested-by: Alison Schofield Reviewed-by: Alison Schofield Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/163553710257.2509508.14310494417463866020.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- include/linux/acpi.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6b7f181d51e2..95f88108f664 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -141,6 +141,9 @@ typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table); typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header, const unsigned long end); +typedef int (*acpi_tbl_entry_handler_arg)(union acpi_subtable_headers *header, + void *arg, const unsigned long end); + /* Debugger support */ struct acpi_debugger_ops { @@ -217,6 +220,8 @@ static inline int acpi_debugger_notify_command_complete(void) struct acpi_subtable_proc { int id; acpi_tbl_entry_handler handler; + acpi_tbl_entry_handler_arg handler_arg; + void *arg; int count; }; @@ -235,9 +240,11 @@ void acpi_table_init_complete (void); int acpi_table_init (void); #ifdef CONFIG_ACPI_TABLE_LIB +#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI) #define __init_or_acpilib #define __initdata_or_acpilib #else +#define EXPORT_SYMBOL_ACPI_LIB(x) #define __init_or_acpilib __init #define __initdata_or_acpilib __initdata #endif @@ -252,6 +259,10 @@ int __init_or_acpilib acpi_table_parse_entries_array(char *id, int acpi_table_parse_madt(enum acpi_madt_type id, acpi_tbl_entry_handler handler, unsigned int max_entries); +int __init_or_acpilib +acpi_table_parse_cedt(enum acpi_cedt_type id, + acpi_tbl_entry_handler_arg handler_arg, void *arg); + int acpi_parse_mcfg (struct acpi_table_header *header); void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); -- cgit v1.2.3 From 03b122da74b22fbe7cd98184fa5657a9ce13970c Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 26 Oct 2021 15:00:48 -0700 Subject: x86/sgx: Hook arch_memory_failure() into mainline code Add a call inside memory_failure() to call the arch specific code to check if the address is an SGX EPC page and handle it. Note the SGX EPC pages do not have a "struct page" entry, so the hook goes in at the same point as the device mapping hook. Pull the call to acquire the mutex earlier so the SGX errors are also protected. Make set_mce_nospec() skip SGX pages when trying to adjust the 1:1 map. Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Reviewed-by: Naoya Horiguchi Tested-by: Reinette Chatre Link: https://lkml.kernel.org/r/20211026220050.697075-6-tony.luck@intel.com --- include/linux/mm.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a7e4a9e7d807..57f1aa2a33b6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3231,6 +3231,19 @@ extern void shake_page(struct page *p); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); +#ifndef arch_memory_failure +static inline int arch_memory_failure(unsigned long pfn, int flags) +{ + return -ENXIO; +} +#endif + +#ifndef arch_is_platform_page +static inline bool arch_is_platform_page(u64 paddr) +{ + return false; +} +#endif /* * Error handlers for various types of pages. -- cgit v1.2.3 From 749303055b78bc38ec0790ccc596cae235446367 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Mon, 15 Nov 2021 12:02:15 +0000 Subject: firmware: cs_dsp: tidy includes in cs_dsp.c and cs_dsp.h This patch removes unused included header files and moves others into cs_dsp.h to ensure that types referenced in the header file are properly described to prevent compiler warnings. Signed-off-by: Simon Trimmer Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20211115120215.56824-1-simont@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/firmware/cirrus/cs_dsp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h index 9ad9eaaaa552..3a54b1afc48f 100644 --- a/include/linux/firmware/cirrus/cs_dsp.h +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -11,6 +11,11 @@ #ifndef __CS_DSP_H #define __CS_DSP_H +#include +#include +#include +#include + #define CS_ADSP2_REGION_0 BIT(0) #define CS_ADSP2_REGION_1 BIT(1) #define CS_ADSP2_REGION_2 BIT(2) -- cgit v1.2.3 From e9380df851878cee71df5a1c7611584421527f7e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sun, 31 Oct 2021 20:48:52 -0500 Subject: ACPI: Add stubs for wakeup handler functions The commit ddfd9dcf270c ("ACPI: PM: Add acpi_[un]register_wakeup_handler()") added new functions for drivers to use during the s2idle wakeup path, but didn't add stubs for when CONFIG_ACPI wasn't set. Add those stubs in for other drivers to be able to use. Fixes: ddfd9dcf270c ("ACPI: PM: Add acpi_[un]register_wakeup_handler()") Acked-by: Rafael J. Wysocki Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20211101014853.6177-1-mario.limonciello@amd.com Signed-off-by: Linus Walleij --- include/linux/acpi.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 143ce7e0bee1..668d007f0917 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -974,6 +974,15 @@ static inline int acpi_get_local_address(acpi_handle handle, u32 *addr) return -ENODEV; } +static inline int acpi_register_wakeup_handler(int wake_irq, + bool (*wakeup)(void *context), void *context) +{ + return -ENXIO; +} + +static inline void acpi_unregister_wakeup_handler( + bool (*wakeup)(void *context), void *context) { } + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC -- cgit v1.2.3 From a3143f7822a9eeb38f0e046080ae8f79f6c7122d Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 2 Nov 2021 16:01:58 -0600 Subject: Remove unused header Commit 6a80b30086b8 ("fmc: Delete the FMC subsystem") removed the last user of , but left the header file behind. Nothing uses this file, delete it now. Cc: Linus Walleij Cc: Alessandro Rubini Signed-off-by: Jonathan Corbet Acked-by: Alessandro Rubini Link: https://lore.kernel.org/r/20211102220203.940290-5-corbet@lwn.net Signed-off-by: Linus Walleij --- include/linux/sdb.h | 160 ---------------------------------------------------- 1 file changed, 160 deletions(-) delete mode 100644 include/linux/sdb.h (limited to 'include') diff --git a/include/linux/sdb.h b/include/linux/sdb.h deleted file mode 100644 index a2404a2bbd10..000000000000 --- a/include/linux/sdb.h +++ /dev/null @@ -1,160 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This is the official version 1.1 of sdb.h - */ -#ifndef __SDB_H__ -#define __SDB_H__ -#ifdef __KERNEL__ -#include -#else -#include -#endif - -/* - * All structures are 64 bytes long and are expected - * to live in an array, one for each interconnect. - * Most fields of the structures are shared among the - * various types, and most-specific fields are at the - * beginning (for alignment reasons, and to keep the - * magic number at the head of the interconnect record - */ - -/* Product, 40 bytes at offset 24, 8-byte aligned - * - * device_id is vendor-assigned; version is device-specific, - * date is hex (e.g 0x20120501), name is UTF-8, blank-filled - * and not terminated with a 0 byte. - */ -struct sdb_product { - uint64_t vendor_id; /* 0x18..0x1f */ - uint32_t device_id; /* 0x20..0x23 */ - uint32_t version; /* 0x24..0x27 */ - uint32_t date; /* 0x28..0x2b */ - uint8_t name[19]; /* 0x2c..0x3e */ - uint8_t record_type; /* 0x3f */ -}; - -/* - * Component, 56 bytes at offset 8, 8-byte aligned - * - * The address range is first to last, inclusive - * (for example 0x100000 - 0x10ffff) - */ -struct sdb_component { - uint64_t addr_first; /* 0x08..0x0f */ - uint64_t addr_last; /* 0x10..0x17 */ - struct sdb_product product; /* 0x18..0x3f */ -}; - -/* Type of the SDB record */ -enum sdb_record_type { - sdb_type_interconnect = 0x00, - sdb_type_device = 0x01, - sdb_type_bridge = 0x02, - sdb_type_integration = 0x80, - sdb_type_repo_url = 0x81, - sdb_type_synthesis = 0x82, - sdb_type_empty = 0xFF, -}; - -/* Type 0: interconnect (first of the array) - * - * sdb_records is the length of the table including this first - * record, version is 1. The bus type is enumerated later. - */ -#define SDB_MAGIC 0x5344422d /* "SDB-" */ -struct sdb_interconnect { - uint32_t sdb_magic; /* 0x00-0x03 */ - uint16_t sdb_records; /* 0x04-0x05 */ - uint8_t sdb_version; /* 0x06 */ - uint8_t sdb_bus_type; /* 0x07 */ - struct sdb_component sdb_component; /* 0x08-0x3f */ -}; - -/* Type 1: device - * - * class is 0 for "custom device", other values are - * to be standardized; ABI version is for the driver, - * bus-specific bits are defined by each bus (see below) - */ -struct sdb_device { - uint16_t abi_class; /* 0x00-0x01 */ - uint8_t abi_ver_major; /* 0x02 */ - uint8_t abi_ver_minor; /* 0x03 */ - uint32_t bus_specific; /* 0x04-0x07 */ - struct sdb_component sdb_component; /* 0x08-0x3f */ -}; - -/* Type 2: bridge - * - * child is the address of the nested SDB table - */ -struct sdb_bridge { - uint64_t sdb_child; /* 0x00-0x07 */ - struct sdb_component sdb_component; /* 0x08-0x3f */ -}; - -/* Type 0x80: integration - * - * all types with bit 7 set are meta-information, so - * software can ignore the types it doesn't know. Here we - * just provide product information for an aggregate device - */ -struct sdb_integration { - uint8_t reserved[24]; /* 0x00-0x17 */ - struct sdb_product product; /* 0x08-0x3f */ -}; - -/* Type 0x81: Top module repository url - * - * again, an informative field that software can ignore - */ -struct sdb_repo_url { - uint8_t repo_url[63]; /* 0x00-0x3e */ - uint8_t record_type; /* 0x3f */ -}; - -/* Type 0x82: Synthesis tool information - * - * this informative record - */ -struct sdb_synthesis { - uint8_t syn_name[16]; /* 0x00-0x0f */ - uint8_t commit_id[16]; /* 0x10-0x1f */ - uint8_t tool_name[8]; /* 0x20-0x27 */ - uint32_t tool_version; /* 0x28-0x2b */ - uint32_t date; /* 0x2c-0x2f */ - uint8_t user_name[15]; /* 0x30-0x3e */ - uint8_t record_type; /* 0x3f */ -}; - -/* Type 0xff: empty - * - * this allows keeping empty slots during development, - * so they can be filled later with minimal efforts and - * no misleading description is ever shipped -- hopefully. - * It can also be used to pad a table to a desired length. - */ -struct sdb_empty { - uint8_t reserved[63]; /* 0x00-0x3e */ - uint8_t record_type; /* 0x3f */ -}; - -/* The type of bus, for bus-specific flags */ -enum sdb_bus_type { - sdb_wishbone = 0x00, - sdb_data = 0x01, -}; - -#define SDB_WB_WIDTH_MASK 0x0f -#define SDB_WB_ACCESS8 0x01 -#define SDB_WB_ACCESS16 0x02 -#define SDB_WB_ACCESS32 0x04 -#define SDB_WB_ACCESS64 0x08 -#define SDB_WB_LITTLE_ENDIAN 0x80 - -#define SDB_DATA_READ 0x04 -#define SDB_DATA_WRITE 0x02 -#define SDB_DATA_EXEC 0x01 - -#endif /* __SDB_H__ */ -- cgit v1.2.3 From 353050be4c19e102178ccc05988101887c25ae53 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Nov 2021 18:48:08 +0000 Subject: bpf: Fix toctou on read-only map's constant scalar tracking Commit a23740ec43ba ("bpf: Track contents of read-only maps as scalars") is checking whether maps are read-only both from BPF program side and user space side, and then, given their content is constant, reading out their data via map->ops->map_direct_value_addr() which is then subsequently used as known scalar value for the register, that is, it is marked as __mark_reg_known() with the read value at verification time. Before a23740ec43ba, the register content was marked as an unknown scalar so the verifier could not make any assumptions about the map content. The current implementation however is prone to a TOCTOU race, meaning, the value read as known scalar for the register is not guaranteed to be exactly the same at a later point when the program is executed, and as such, the prior made assumptions of the verifier with regards to the program will be invalid which can cause issues such as OOB access, etc. While the BPF_F_RDONLY_PROG map flag is always fixed and required to be specified at map creation time, the map->frozen property is initially set to false for the map given the map value needs to be populated, e.g. for global data sections. Once complete, the loader "freezes" the map from user space such that no subsequent updates/deletes are possible anymore. For the rest of the lifetime of the map, this freeze one-time trigger cannot be undone anymore after a successful BPF_MAP_FREEZE cmd return. Meaning, any new BPF_* cmd calls which would update/delete map entries will be rejected with -EPERM since map_get_sys_perms() removes the FMODE_CAN_WRITE permission. This also means that pending update/delete map entries must still complete before this guarantee is given. This corner case is not an issue for loaders since they create and prepare such program private map in successive steps. However, a malicious user is able to trigger this TOCTOU race in two different ways: i) via userfaultfd, and ii) via batched updates. For i) userfaultfd is used to expand the competition interval, so that map_update_elem() can modify the contents of the map after map_freeze() and bpf_prog_load() were executed. This works, because userfaultfd halts the parallel thread which triggered a map_update_elem() at the time where we copy key/value from the user buffer and this already passed the FMODE_CAN_WRITE capability test given at that time the map was not "frozen". Then, the main thread performs the map_freeze() and bpf_prog_load(), and once that had completed successfully, the other thread is woken up to complete the pending map_update_elem() which then changes the map content. For ii) the idea of the batched update is similar, meaning, when there are a large number of updates to be processed, it can increase the competition interval between the two. It is therefore possible in practice to modify the contents of the map after executing map_freeze() and bpf_prog_load(). One way to fix both i) and ii) at the same time is to expand the use of the map's map->writecnt. The latter was introduced in fc9702273e2e ("bpf: Add mmap() support for BPF_MAP_TYPE_ARRAY") and further refined in 1f6cb19be2e2 ("bpf: Prevent re-mmap()'ing BPF map as writable for initially r/o mapping") with the rationale to make a writable mmap()'ing of a map mutually exclusive with read-only freezing. The counter indicates writable mmap() mappings and then prevents/fails the freeze operation. Its semantics can be expanded beyond just mmap() by generally indicating ongoing write phases. This would essentially span any parallel regular and batched flavor of update/delete operation and then also have map_freeze() fail with -EBUSY. For the check_mem_access() in the verifier we expand upon the bpf_map_is_rdonly() check ensuring that all last pending writes have completed via bpf_map_write_active() test. Once the map->frozen is set and bpf_map_write_active() indicates a map->writecnt of 0 only then we are really guaranteed to use the map's data as known constants. For map->frozen being set and pending writes in process of still being completed we fall back to marking that register as unknown scalar so we don't end up making assumptions about it. With this, both TOCTOU reproducers from i) and ii) are fixed. Note that the map->writecnt has been converted into a atomic64 in the fix in order to avoid a double freeze_mutex mutex_{un,}lock() pair when updating map->writecnt in the various map update/delete BPF_* cmd flavors. Spanning the freeze_mutex over entire map update/delete operations in syscall side would not be possible due to then causing everything to be serialized. Similarly, something like synchronize_rcu() after setting map->frozen to wait for update/deletes to complete is not possible either since it would also have to span the user copy which can sleep. On the libbpf side, this won't break d66562fba1ce ("libbpf: Add BPF object skeleton support") as the anonymous mmap()-ed "map initialization image" is remapped as a BPF map-backed mmap()-ed memory where for .rodata it's non-writable. Fixes: a23740ec43ba ("bpf: Track contents of read-only maps as scalars") Reported-by: w1tcher.bupt@gmail.com Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f715e8863f4d..e7a163a3146b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -193,7 +193,7 @@ struct bpf_map { atomic64_t usercnt; struct work_struct work; struct mutex freeze_mutex; - u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */ + atomic64_t writecnt; }; static inline bool map_value_has_spin_lock(const struct bpf_map *map) @@ -1419,6 +1419,7 @@ void bpf_map_put(struct bpf_map *map); void *bpf_map_area_alloc(u64 size, int numa_node); void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); void bpf_map_area_free(void *base); +bool bpf_map_write_active(const struct bpf_map *map); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, -- cgit v1.2.3 From 7206998f578d5553989bc01ea2e544b622e79539 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Nov 2021 08:24:59 +0100 Subject: ALSA: hda: Fix potential deadlock at codec unbinding When a codec is unbound dynamically via sysfs while its stream is in use, we may face a potential deadlock at the proc remove or a UAF. This happens since the hda_pcm is managed by a linked list, as it handles the hda_pcm object release via kref. When a PCM is opened at the unbinding time, the release of hda_pcm gets delayed and it ends up with the close of the PCM stream releasing the associated hda_pcm object of its own. The hda_pcm destructor contains the PCM device release that includes the removal of procfs entries. And, this removal has the sync of the close of all in-use files -- which would never finish because it's called from the PCM file descriptor itself, i.e. it's trying to shoot its foot. For addressing the deadlock above, this patch changes the way to manage and release the hda_pcm object. The kref of hda_pcm is dropped, and instead a simple refcount is introduced in hda_codec for keeping the track of the active PCM streams, and at each PCM open and close, this refcount is adjusted accordingly. At unbinding, the driver calls snd_device_disconnect() for each PCM stream, then synchronizes with the refcount finish, and finally releases the object resources. Fixes: bbbc7e8502c9 ("ALSA: hda - Allocate hda_pcm objects dynamically") Link: https://lore.kernel.org/r/20211116072459.18930-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/hda_codec.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h index 0e45963bb767..82d9daa17851 100644 --- a/include/sound/hda_codec.h +++ b/include/sound/hda_codec.h @@ -8,7 +8,7 @@ #ifndef __SOUND_HDA_CODEC_H #define __SOUND_HDA_CODEC_H -#include +#include #include #include #include @@ -166,8 +166,8 @@ struct hda_pcm { bool own_chmap; /* codec driver provides own channel maps */ /* private: */ struct hda_codec *codec; - struct kref kref; struct list_head list; + unsigned int disconnected:1; }; /* codec information */ @@ -187,6 +187,8 @@ struct hda_codec { /* PCM to create, set by patch_ops.build_pcms callback */ struct list_head pcm_list_head; + refcount_t pcm_ref; + wait_queue_head_t remove_sleep; /* codec specific info */ void *spec; @@ -420,7 +422,7 @@ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec); static inline void snd_hda_codec_pcm_get(struct hda_pcm *pcm) { - kref_get(&pcm->kref); + refcount_inc(&pcm->codec->pcm_ref); } void snd_hda_codec_pcm_put(struct hda_pcm *pcm); -- cgit v1.2.3 From 2c95b92ecd92e784785b1db8cccc4f0f2bfa850c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Nov 2021 08:33:58 +0100 Subject: ALSA: memalloc: Unify x86 SG-buffer handling (take#3) This is a second attempt to unify the x86-specific SG-buffer handling code with the new standard non-contiguous page handler. The first try (in commit 2d9ea39917a4) failed due to the wrong page and address calculations, hence reverted. (And the second try failed due to a copy&paste error.) Now it's corrected with the previous fix for noncontig pages, and the proper sg page iteration by this patch. After the migration, SNDRV_DMA_TYPE_DMA_SG becomes identical with SNDRV_DMA_TYPE_NONCONTIG on x86, while others still fall back to SNDRV_DMA_TYPE_DEV. Tested-by: Alex Xu (Hello71) Tested-by: Harald Arnesen Link: https://lore.kernel.org/r/20211017074859.24112-4-tiwai@suse.de Link: https://lore.kernel.org/r/20211109062235.22310-1-tiwai@suse.de Link: https://lore.kernel.org/r/20211116073358.19741-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/memalloc.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h index 1051b84e8579..653dfffb3ac8 100644 --- a/include/sound/memalloc.h +++ b/include/sound/memalloc.h @@ -36,13 +36,6 @@ struct snd_dma_device { #define SNDRV_DMA_TYPE_CONTINUOUS 1 /* continuous no-DMA memory */ #define SNDRV_DMA_TYPE_DEV 2 /* generic device continuous */ #define SNDRV_DMA_TYPE_DEV_WC 5 /* continuous write-combined */ -#ifdef CONFIG_SND_DMA_SGBUF -#define SNDRV_DMA_TYPE_DEV_SG 3 /* generic device SG-buffer */ -#define SNDRV_DMA_TYPE_DEV_WC_SG 6 /* SG write-combined */ -#else -#define SNDRV_DMA_TYPE_DEV_SG SNDRV_DMA_TYPE_DEV /* no SG-buf support */ -#define SNDRV_DMA_TYPE_DEV_WC_SG SNDRV_DMA_TYPE_DEV_WC -#endif #ifdef CONFIG_GENERIC_ALLOCATOR #define SNDRV_DMA_TYPE_DEV_IRAM 4 /* generic device iram-buffer */ #else @@ -51,6 +44,13 @@ struct snd_dma_device { #define SNDRV_DMA_TYPE_VMALLOC 7 /* vmalloc'ed buffer */ #define SNDRV_DMA_TYPE_NONCONTIG 8 /* non-coherent SG buffer */ #define SNDRV_DMA_TYPE_NONCOHERENT 9 /* non-coherent buffer */ +#ifdef CONFIG_SND_DMA_SGBUF +#define SNDRV_DMA_TYPE_DEV_SG SNDRV_DMA_TYPE_NONCONTIG +#define SNDRV_DMA_TYPE_DEV_WC_SG 6 /* SG write-combined */ +#else +#define SNDRV_DMA_TYPE_DEV_SG SNDRV_DMA_TYPE_DEV /* no SG-buf support */ +#define SNDRV_DMA_TYPE_DEV_WC_SG SNDRV_DMA_TYPE_DEV_WC +#endif /* * info for buffer allocation -- cgit v1.2.3 From 0f0ac158d28ff78e75c334e869b1cb8e69372a1f Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Sun, 24 Oct 2021 16:37:05 +1300 Subject: platform/x86: asus-wmi: Add support for custom fan curves Add support for custom fan curves found on some ASUS ROG laptops. These laptops have the ability to set a custom curve for the CPU and GPU fans via two ACPI methods. This patch adds two pwm attributes to the hwmon sysfs, pwm1 for CPU fan, pwm2 for GPU fan. Both are under the hwmon of the name `asus_custom_fan_curve`. There is no safety check of the set fan curves - this must be done in userspace. The fans have settings [1,2,3] under pwm_enable: 1. Enable and write settings out 2. Disable and use factory fan mode 3. Same as 2, additionally restoring default factory curve. Use of 2 means that the curve the user has set is still stored and won't be erased, but the laptop will be using its default auto-fan mode. Re-enabling the manual mode then activates the curves again. Notes: - pwm_enable = 0 is an invalid setting. - pwm is actually a percentage and is scaled on writing to device. Signed-off-by: Luke D. Jones Link: https://lore.kernel.org/r/20211024033705.5595-2-luke@ljones.dev Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/platform_data/x86/asus-wmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 17dc5cb6f3f2..a571b47ff362 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -77,6 +77,8 @@ #define ASUS_WMI_DEVID_THERMAL_CTRL 0x00110011 #define ASUS_WMI_DEVID_FAN_CTRL 0x00110012 /* deprecated */ #define ASUS_WMI_DEVID_CPU_FAN_CTRL 0x00110013 +#define ASUS_WMI_DEVID_CPU_FAN_CURVE 0x00110024 +#define ASUS_WMI_DEVID_GPU_FAN_CURVE 0x00110025 /* Power */ #define ASUS_WMI_DEVID_PROCESSOR_STATE 0x00120012 -- cgit v1.2.3 From 38543b72fbe52b7eec0dedd420d80a06c652d8e4 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Thu, 28 Oct 2021 02:22:41 +0200 Subject: platform/surface: aggregator: Make client device removal more generic Currently, there are similar functions defined in the Aggregator Registry and the controller core. Make client device removal more generic and export it. We can then use this function later on to remove client devices from device hubs as well as the controller and avoid re-defining similar things. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20211028002243.1586083-2-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/surface_aggregator/device.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index f636c5310321..cc257097eb05 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -319,6 +319,15 @@ void ssam_device_driver_unregister(struct ssam_device_driver *d); ssam_device_driver_unregister) +/* -- Helpers for controller and hub devices. ------------------------------- */ + +#ifdef CONFIG_SURFACE_AGGREGATOR_BUS +void ssam_remove_clients(struct device *dev); +#else /* CONFIG_SURFACE_AGGREGATOR_BUS */ +static inline void ssam_remove_clients(struct device *dev) {} +#endif /* CONFIG_SURFACE_AGGREGATOR_BUS */ + + /* -- Helpers for client-device requests. ----------------------------------- */ /** -- cgit v1.2.3 From b04cc0d912eb80d3c438b11d96ca847c3e77e8ab Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 25 Oct 2021 21:56:31 +0100 Subject: memory: renesas-rpc-if: Add support for RZ/G2L SPI Multi I/O Bus Controller on RZ/G2L SoC is almost identical to the RPC-IF interface found on R-Car Gen3 SoC's. This patch adds a new compatible string for the RZ/G2L family so that the timing values on RZ/G2L can be adjusted. Signed-off-by: Lad Prabhakar Reviewed-by: Biju Das Reviewed-by: Wolfram Sang Link: https://lore.kernel.org/r/20211025205631.21151-8-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Krzysztof Kozlowski --- include/memory/renesas-rpc-if.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/memory/renesas-rpc-if.h b/include/memory/renesas-rpc-if.h index 77c694a19149..7c93f5177532 100644 --- a/include/memory/renesas-rpc-if.h +++ b/include/memory/renesas-rpc-if.h @@ -57,6 +57,11 @@ struct rpcif_op { } data; }; +enum rpcif_type { + RPCIF_RCAR_GEN3, + RPCIF_RZ_G2L, +}; + struct rpcif { struct device *dev; void __iomem *base; @@ -64,6 +69,7 @@ struct rpcif { struct regmap *regmap; struct reset_control *rstc; size_t size; + enum rpcif_type type; enum rpcif_data_dir dir; u8 bus_size; void *buffer; @@ -78,7 +84,7 @@ struct rpcif { }; int rpcif_sw_init(struct rpcif *rpc, struct device *dev); -void rpcif_hw_init(struct rpcif *rpc, bool hyperflash); +int rpcif_hw_init(struct rpcif *rpc, bool hyperflash); void rpcif_prepare(struct rpcif *rpc, const struct rpcif_op *op, u64 *offs, size_t *len); int rpcif_manual_xfer(struct rpcif *rpc); -- cgit v1.2.3 From ebf7f6f0a6cdcc17a3da52b81e4b3a98c4005028 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 5 Nov 2021 09:30:00 +0800 Subject: bpf: Change value of MAX_TAIL_CALL_CNT from 32 to 33 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the current code, the actual max tail call count is 33 which is greater than MAX_TAIL_CALL_CNT (defined as 32). The actual limit is not consistent with the meaning of MAX_TAIL_CALL_CNT and thus confusing at first glance. We can see the historical evolution from commit 04fd61ab36ec ("bpf: allow bpf programs to tail-call other bpf programs") and commit f9dabe016b63 ("bpf: Undo off-by-one in interpreter tail call count limit"). In order to avoid changing existing behavior, the actual limit is 33 now, this is reasonable. After commit 874be05f525e ("bpf, tests: Add tail call test suite"), we can see there exists failed testcase. On all archs when CONFIG_BPF_JIT_ALWAYS_ON is not set: # echo 0 > /proc/sys/net/core/bpf_jit_enable # modprobe test_bpf # dmesg | grep -w FAIL Tail call error path, max count reached jited:0 ret 34 != 33 FAIL On some archs: # echo 1 > /proc/sys/net/core/bpf_jit_enable # modprobe test_bpf # dmesg | grep -w FAIL Tail call error path, max count reached jited:1 ret 34 != 33 FAIL Although the above failed testcase has been fixed in commit 18935a72eb25 ("bpf/tests: Fix error in tail call limit tests"), it would still be good to change the value of MAX_TAIL_CALL_CNT from 32 to 33 to make the code more readable. The 32-bit x86 JIT was using a limit of 32, just fix the wrong comments and limit to 33 tail calls as the constant MAX_TAIL_CALL_CNT updated. For the mips64 JIT, use "ori" instead of "addiu" as suggested by Johan Almbladh. For the riscv JIT, use RV_REG_TCC directly to save one register move as suggested by Björn Töpel. For the other implementations, no function changes, it does not change the current limit 33, the new value of MAX_TAIL_CALL_CNT can reflect the actual max tail call count, the related tail call testcases in test_bpf module and selftests can work well for the interpreter and the JIT. Here are the test results on x86_64: # uname -m x86_64 # echo 0 > /proc/sys/net/core/bpf_jit_enable # modprobe test_bpf test_suite=test_tail_calls # dmesg | tail -1 test_bpf: test_tail_calls: Summary: 8 PASSED, 0 FAILED, [0/8 JIT'ed] # rmmod test_bpf # echo 1 > /proc/sys/net/core/bpf_jit_enable # modprobe test_bpf test_suite=test_tail_calls # dmesg | tail -1 test_bpf: test_tail_calls: Summary: 8 PASSED, 0 FAILED, [8/8 JIT'ed] # rmmod test_bpf # ./test_progs -t tailcalls #142 tailcalls:OK Summary: 1/11 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Tiezhu Yang Signed-off-by: Daniel Borkmann Tested-by: Johan Almbladh Tested-by: Ilya Leoshkevich Acked-by: Björn Töpel Acked-by: Johan Almbladh Acked-by: Ilya Leoshkevich Link: https://lore.kernel.org/bpf/1636075800-3264-1-git-send-email-yangtiezhu@loongson.cn --- include/linux/bpf.h | 2 +- include/uapi/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 56098c866704..cc7a0c36e7df 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1081,7 +1081,7 @@ struct bpf_array { }; #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ -#define MAX_TAIL_CALL_CNT 32 +#define MAX_TAIL_CALL_CNT 33 #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ BPF_F_RDONLY_PROG | \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6297eafdc40f..a69e4b04ffeb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1744,7 +1744,7 @@ union bpf_attr { * if the maximum number of tail calls has been reached for this * chain of programs. This limit is defined in the kernel by the * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), - * which is currently set to 32. + * which is currently set to 33. * Return * 0 on success, or a negative error in case of failure. * -- cgit v1.2.3 From 283c6b54bca13313a4f437719f600a3ad2135847 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 11:02:31 -0800 Subject: tcp: remove dead code in __tcp_v6_send_check() For some reason, I forgot to change __tcp_v6_send_check() at the same time I removed (ip_summed == CHECKSUM_PARTIAL) check in __tcp_v4_send_check() Fixes: 98be9b12096f ("tcp: remove dead code after CHECKSUM_PARTIAL adoption") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_checksum.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h index b3f4eaa88672..ea681910b7a3 100644 --- a/include/net/ip6_checksum.h +++ b/include/net/ip6_checksum.h @@ -65,15 +65,9 @@ static inline void __tcp_v6_send_check(struct sk_buff *skb, { struct tcphdr *th = tcp_hdr(skb); - if (skb->ip_summed == CHECKSUM_PARTIAL) { - th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); - } else { - th->check = tcp_v6_check(skb->len, saddr, daddr, - csum_partial(th, th->doff << 2, - skb->csum)); - } + th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); } static inline void tcp_v6_gso_csum_prep(struct sk_buff *skb) -- cgit v1.2.3 From 42f67eea3ba36cef2dce2e853de6ddcb2e89eb39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 11:02:33 -0800 Subject: net: use sk_is_tcp() in more places Move sk_is_tcp() to include/net/sock.h and use it where we can. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skmsg.h | 6 ------ include/net/sock.h | 5 +++++ 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 584d94be9c8b..18a717fe62eb 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -507,12 +507,6 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) return !!psock->saved_data_ready; } -static inline bool sk_is_tcp(const struct sock *sk) -{ - return sk->sk_type == SOCK_STREAM && - sk->sk_protocol == IPPROTO_TCP; -} - static inline bool sk_is_udp(const struct sock *sk) { return sk->sk_type == SOCK_DGRAM && diff --git a/include/net/sock.h b/include/net/sock.h index b32906e1ab55..5bdeffdea5ec 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2638,6 +2638,11 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) &skb_shinfo(skb)->tskey); } +static inline bool sk_is_tcp(const struct sock *sk) +{ + return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP; +} + /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from -- cgit v1.2.3 From d0d598ca86bd9e595f16a39097707c90841afe80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 11:02:34 -0800 Subject: net: remove sk_route_forced_caps We were only using one bit, and we can replace it by sk_is_tcp() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 5bdeffdea5ec..ebad629dd9ed 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -285,8 +285,6 @@ struct bpf_local_storage; * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) - * @sk_route_forced_caps: static, forced route capabilities - * (set in tcp_init_sock()) * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_gso_max_size: Maximum GSO segment size to build * @sk_gso_max_segs: Maximum number of GSO segments @@ -461,7 +459,6 @@ struct sock { struct page_frag sk_frag; netdev_features_t sk_route_caps; netdev_features_t sk_route_nocaps; - netdev_features_t sk_route_forced_caps; int sk_gso_type; unsigned int sk_gso_max_size; gfp_t sk_allocation; -- cgit v1.2.3 From aba546565b613e74b84b8261999ea82b5561d3f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 11:02:35 -0800 Subject: net: remove sk_route_nocaps Instead of using a full netdev_features_t, we can use a single bit, as sk_route_nocaps is only used to remove NETIF_F_GSO_MASK from sk->sk_route_cap. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index ebad629dd9ed..985ddcd33504 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -284,7 +284,7 @@ struct bpf_local_storage; * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) - * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) + * @sk_gso_disabled: if set, NETIF_F_GSO_MASK is forbidden. * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_gso_max_size: Maximum GSO segment size to build * @sk_gso_max_segs: Maximum number of GSO segments @@ -458,7 +458,6 @@ struct sock { unsigned long sk_max_pacing_rate; struct page_frag sk_frag; netdev_features_t sk_route_caps; - netdev_features_t sk_route_nocaps; int sk_gso_type; unsigned int sk_gso_max_size; gfp_t sk_allocation; @@ -468,7 +467,7 @@ struct sock { * Because of non atomicity rules, all * changes are protected by socket lock. */ - u8 sk_padding : 1, + u8 sk_gso_disabled : 1, sk_kern_sock : 1, sk_no_check_tx : 1, sk_no_check_rx : 1, @@ -2121,10 +2120,10 @@ static inline bool sk_can_gso(const struct sock *sk) void sk_setup_caps(struct sock *sk, struct dst_entry *dst); -static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) +static inline void sk_gso_disable(struct sock *sk) { - sk->sk_route_nocaps |= flags; - sk->sk_route_caps &= ~flags; + sk->sk_gso_disabled = 1; + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, -- cgit v1.2.3 From 1b31debca83284486cd736757b5f26d51719ef80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 11:02:36 -0800 Subject: ipv6: shrink struct ipcm6_cookie gso_size can be moved after tclass, to use an existing hole. (8 bytes saved on 64bit arches) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c19bf51ded1d..53ac7707ca70 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -345,9 +345,9 @@ struct ipcm6_cookie { struct sockcm_cookie sockc; __s16 hlimit; __s16 tclass; + __u16 gso_size; __s8 dontfrag; struct ipv6_txoptions *opt; - __u16 gso_size; }; static inline void ipcm6_init(struct ipcm6_cookie *ipc6) -- cgit v1.2.3 From 1ace2b4d2b4e1db8fc62d872ab54ab48f6215ecd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 11:02:37 -0800 Subject: net: shrink struct sock by 8 bytes Move sk_bind_phc next to sk_peer_lock to fill a hole. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 985ddcd33504..2333ab081789 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -489,6 +489,7 @@ struct sock { u16 sk_busy_poll_budget; #endif spinlock_t sk_peer_lock; + int sk_bind_phc; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; @@ -498,7 +499,6 @@ struct sock { seqlock_t sk_stamp_seq; #endif u16 sk_tsflags; - int sk_bind_phc; u8 sk_shutdown; u32 sk_tskey; atomic_t sk_zckey; -- cgit v1.2.3 From 6c302e799a0d4be1362f505453b714fe05d91f2a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 11:02:38 -0800 Subject: net: forward_alloc_get depends on CONFIG_MPTCP (struct proto)->sk_forward_alloc is currently only used by MPTCP. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 2333ab081789..cb97c448472a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1206,7 +1206,9 @@ struct proto { unsigned int inuse_idx; #endif +#if IS_ENABLED(CONFIG_MPTCP) int (*forward_alloc_get)(const struct sock *sk); +#endif bool (*stream_memory_free)(const struct sock *sk, int wake); bool (*sock_is_readable)(struct sock *sk); @@ -1295,10 +1297,11 @@ INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int static inline int sk_forward_alloc_get(const struct sock *sk) { - if (!sk->sk_prot->forward_alloc_get) - return sk->sk_forward_alloc; - - return sk->sk_prot->forward_alloc_get(sk); +#if IS_ENABLED(CONFIG_MPTCP) + if (sk->sk_prot->forward_alloc_get) + return sk->sk_prot->forward_alloc_get(sk); +#endif + return sk->sk_forward_alloc; } static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) -- cgit v1.2.3 From d2489c7b6d7d5ed4b32b56703c57c47bfbfe7fa5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 11:02:41 -0800 Subject: tcp: add RETPOLINE mitigation to sk_backlog_rcv Use INDIRECT_CALL_INET() to avoid an indirect call when/if CONFIG_RETPOLINE=y Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index cb97c448472a..2d40fe4c7718 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1018,12 +1018,18 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); +INDIRECT_CALLABLE_DECLARE(int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)); +INDIRECT_CALLABLE_DECLARE(int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)); + static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { if (sk_memalloc_socks() && skb_pfmemalloc(skb)) return __sk_backlog_rcv(sk, skb); - return sk->sk_backlog_rcv(sk, skb); + return INDIRECT_CALL_INET(sk->sk_backlog_rcv, + tcp_v6_do_rcv, + tcp_v4_do_rcv, + sk, skb); } static inline void sk_incoming_cpu_update(struct sock *sk) -- cgit v1.2.3 From 0307a0b74b3af6ecb1c8b7f727376130b15bbf44 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 11:02:42 -0800 Subject: tcp: annotate data-races on tp->segs_in and tp->data_segs_in tcp_segs_in() can be called from BH, while socket spinlock is held but socket owned by user, eventually reading these fields from tcp_get_info() Found by code inspection, no need to backport this patch to older kernels. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 4da22b41bde6..05c81677aaf7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2172,9 +2172,13 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) u16 segs_in; segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs); - tp->segs_in += segs_in; + + /* We update these fields while other threads might + * read them from tcp_get_info() + */ + WRITE_ONCE(tp->segs_in, tp->segs_in + segs_in); if (skb->len > tcp_hdrlen(skb)) - tp->data_segs_in += segs_in; + WRITE_ONCE(tp->data_segs_in, tp->data_segs_in + segs_in); } /* -- cgit v1.2.3 From f35f821935d8df76f9c92e2431a225bdff938169 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 11:02:46 -0800 Subject: tcp: defer skb freeing after socket lock is released tcp recvmsg() (or rx zerocopy) spends a fair amount of time freeing skbs after their payload has been consumed. A typical ~64KB GRO packet has to release ~45 page references, eventually going to page allocator for each of them. Currently, this freeing is performed while socket lock is held, meaning that there is a high chance that BH handler has to queue incoming packets to tcp socket backlog. This can cause additional latencies, because the user thread has to process the backlog at release_sock() time, and while doing so, additional frames can be added by BH handler. This patch adds logic to defer these frees after socket lock is released, or directly from BH handler if possible. Being able to free these skbs from BH handler helps a lot, because this avoids the usual alloc/free assymetry, when BH handler and user thread do not run on same cpu or NUMA node. One cpu can now be fully utilized for the kernel->user copy, and another cpu is handling BH processing and skb/page allocs/frees (assuming RFS is not forcing use of a single CPU) Tested: 100Gbit NIC Max throughput for one TCP_STREAM flow, over 10 runs MTU : 1500 Before: 55 Gbit After: 66 Gbit MTU : 4096+(headers) Before: 82 Gbit After: 95 Gbit Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ include/net/sock.h | 3 +++ include/net/tcp.h | 10 ++++++++++ 3 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 686a666d073d..b8b806512e16 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #if IS_ENABLED(CONFIG_NF_CONNTRACK) @@ -743,6 +744,7 @@ struct sk_buff { }; struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */ struct list_head list; + struct llist_node ll_node; }; union { diff --git a/include/net/sock.h b/include/net/sock.h index 2d40fe4c7718..2578d1f455a7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -63,6 +63,7 @@ #include #include #include +#include #include #include #include @@ -408,6 +409,8 @@ struct sock { struct sk_buff *head; struct sk_buff *tail; } sk_backlog; + struct llist_head defer_list; + #define sk_rmem_alloc sk_backlog.rmem_alloc int sk_forward_alloc; diff --git a/include/net/tcp.h b/include/net/tcp.h index 05c81677aaf7..44e442bf23f9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1368,6 +1368,16 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) } bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); + +void __sk_defer_free_flush(struct sock *sk); + +static inline void sk_defer_free_flush(struct sock *sk) +{ + if (llist_empty(&sk->defer_list)) + return; + __sk_defer_free_flush(sk); +} + int tcp_filter(struct sock *sk, struct sk_buff *skb); void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); -- cgit v1.2.3 From 43f51df4172955971ef5498f09308a9dc0291766 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 11:02:49 -0800 Subject: net: move early demux fields close to sk_refcnt sk_rx_dst/sk_rx_dst_ifindex/sk_rx_dst_cookie are read in early demux, and currently spans two cache lines. Moving them close to sk_refcnt makes more sense, as only one cache line is needed. New layout for this hot cache line is : struct sock { struct sock_common __sk_common; /* 0 0x88 */ /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */ struct dst_entry * sk_rx_dst; /* 0x88 0x8 */ int sk_rx_dst_ifindex; /* 0x90 0x4 */ u32 sk_rx_dst_cookie; /* 0x94 0x4 */ socket_lock_t sk_lock; /* 0x98 0x20 */ atomic_t sk_drops; /* 0xb8 0x4 */ int sk_rcvlowat; /* 0xbc 0x4 */ /* --- cacheline 3 boundary (192 bytes) --- */ Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 2578d1f455a7..95cc03bd3fac 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -390,6 +390,11 @@ struct sock { #define sk_flags __sk_common.skc_flags #define sk_rxhash __sk_common.skc_rxhash + /* early demux fields */ + struct dst_entry *sk_rx_dst; + int sk_rx_dst_ifindex; + u32 sk_rx_dst_cookie; + socket_lock_t sk_lock; atomic_t sk_drops; int sk_rcvlowat; @@ -432,9 +437,6 @@ struct sock { #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif - struct dst_entry *sk_rx_dst; - int sk_rx_dst_ifindex; - u32 sk_rx_dst_cookie; struct dst_entry __rcu *sk_dst_cache; atomic_t sk_omem_alloc; -- cgit v1.2.3 From 4721031c3559db8eae61df305f10c00099a7c1d0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 09:05:51 -0800 Subject: net: move gro definitions to include/net/gro.h include/linux/netdevice.h became too big, move gro stuff into include/net/gro.h Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 348 --------------------------------------- include/net/gro.h | 396 ++++++++++++++++++++++++++++++++++++++++++++- include/net/ip.h | 8 - include/net/ip6_checksum.h | 8 - include/net/udp.h | 24 --- 5 files changed, 394 insertions(+), 390 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3ec42495a43a..d95c9839ce90 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2520,109 +2520,6 @@ static inline void netif_napi_del(struct napi_struct *napi) synchronize_net(); } -struct napi_gro_cb { - /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ - void *frag0; - - /* Length of frag0. */ - unsigned int frag0_len; - - /* This indicates where we are processing relative to skb->data. */ - int data_offset; - - /* This is non-zero if the packet cannot be merged with the new skb. */ - u16 flush; - - /* Save the IP ID here and check when we get to the transport layer */ - u16 flush_id; - - /* Number of segments aggregated. */ - u16 count; - - /* Start offset for remote checksum offload */ - u16 gro_remcsum_start; - - /* jiffies when first packet was created/queued */ - unsigned long age; - - /* Used in ipv6_gro_receive() and foo-over-udp */ - u16 proto; - - /* This is non-zero if the packet may be of the same flow. */ - u8 same_flow:1; - - /* Used in tunnel GRO receive */ - u8 encap_mark:1; - - /* GRO checksum is valid */ - u8 csum_valid:1; - - /* Number of checksums via CHECKSUM_UNNECESSARY */ - u8 csum_cnt:3; - - /* Free the skb? */ - u8 free:2; -#define NAPI_GRO_FREE 1 -#define NAPI_GRO_FREE_STOLEN_HEAD 2 - - /* Used in foo-over-udp, set in udp[46]_gro_receive */ - u8 is_ipv6:1; - - /* Used in GRE, set in fou/gue_gro_receive */ - u8 is_fou:1; - - /* Used to determine if flush_id can be ignored */ - u8 is_atomic:1; - - /* Number of gro_receive callbacks this packet already went through */ - u8 recursion_counter:4; - - /* GRO is done by frag_list pointer chaining. */ - u8 is_flist:1; - - /* used to support CHECKSUM_COMPLETE for tunneling protocols */ - __wsum csum; - - /* used in skb_gro_receive() slow path */ - struct sk_buff *last; -}; - -#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) - -#define GRO_RECURSION_LIMIT 15 -static inline int gro_recursion_inc_test(struct sk_buff *skb) -{ - return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; -} - -typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *); -static inline struct sk_buff *call_gro_receive(gro_receive_t cb, - struct list_head *head, - struct sk_buff *skb) -{ - if (unlikely(gro_recursion_inc_test(skb))) { - NAPI_GRO_CB(skb)->flush |= 1; - return NULL; - } - - return cb(head, skb); -} - -typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *, - struct sk_buff *); -static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, - struct sock *sk, - struct list_head *head, - struct sk_buff *skb) -{ - if (unlikely(gro_recursion_inc_test(skb))) { - NAPI_GRO_CB(skb)->flush |= 1; - return NULL; - } - - return cb(sk, head, skb); -} - struct packet_type { __be16 type; /* This is really htons(ether_type). */ bool ignore_outgoing; @@ -3008,251 +2905,6 @@ int dev_restart(struct net_device *dev); int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); -static inline unsigned int skb_gro_offset(const struct sk_buff *skb) -{ - return NAPI_GRO_CB(skb)->data_offset; -} - -static inline unsigned int skb_gro_len(const struct sk_buff *skb) -{ - return skb->len - NAPI_GRO_CB(skb)->data_offset; -} - -static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) -{ - NAPI_GRO_CB(skb)->data_offset += len; -} - -static inline void *skb_gro_header_fast(struct sk_buff *skb, - unsigned int offset) -{ - return NAPI_GRO_CB(skb)->frag0 + offset; -} - -static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) -{ - return NAPI_GRO_CB(skb)->frag0_len < hlen; -} - -static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) -{ - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; -} - -static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, - unsigned int offset) -{ - if (!pskb_may_pull(skb, hlen)) - return NULL; - - skb_gro_frag0_invalidate(skb); - return skb->data + offset; -} - -static inline void *skb_gro_network_header(struct sk_buff *skb) -{ - return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + - skb_network_offset(skb); -} - -static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, - const void *start, unsigned int len) -{ - if (NAPI_GRO_CB(skb)->csum_valid) - NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum, - csum_partial(start, len, 0)); -} - -/* GRO checksum functions. These are logical equivalents of the normal - * checksum functions (in skbuff.h) except that they operate on the GRO - * offsets and fields in sk_buff. - */ - -__sum16 __skb_gro_checksum_complete(struct sk_buff *skb); - -static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb) -{ - return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb)); -} - -static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, - bool zero_okay, - __sum16 check) -{ - return ((skb->ip_summed != CHECKSUM_PARTIAL || - skb_checksum_start_offset(skb) < - skb_gro_offset(skb)) && - !skb_at_gro_remcsum_start(skb) && - NAPI_GRO_CB(skb)->csum_cnt == 0 && - (!zero_okay || check)); -} - -static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, - __wsum psum) -{ - if (NAPI_GRO_CB(skb)->csum_valid && - !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) - return 0; - - NAPI_GRO_CB(skb)->csum = psum; - - return __skb_gro_checksum_complete(skb); -} - -static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) -{ - if (NAPI_GRO_CB(skb)->csum_cnt > 0) { - /* Consume a checksum from CHECKSUM_UNNECESSARY */ - NAPI_GRO_CB(skb)->csum_cnt--; - } else { - /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we - * verified a new top level checksum or an encapsulated one - * during GRO. This saves work if we fallback to normal path. - */ - __skb_incr_checksum_unnecessary(skb); - } -} - -#define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ - compute_pseudo) \ -({ \ - __sum16 __ret = 0; \ - if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ - __ret = __skb_gro_checksum_validate_complete(skb, \ - compute_pseudo(skb, proto)); \ - if (!__ret) \ - skb_gro_incr_csum_unnecessary(skb); \ - __ret; \ -}) - -#define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ - __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) - -#define skb_gro_checksum_validate_zero_check(skb, proto, check, \ - compute_pseudo) \ - __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) - -#define skb_gro_checksum_simple_validate(skb) \ - __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) - -static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) -{ - return (NAPI_GRO_CB(skb)->csum_cnt == 0 && - !NAPI_GRO_CB(skb)->csum_valid); -} - -static inline void __skb_gro_checksum_convert(struct sk_buff *skb, - __wsum pseudo) -{ - NAPI_GRO_CB(skb)->csum = ~pseudo; - NAPI_GRO_CB(skb)->csum_valid = 1; -} - -#define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \ -do { \ - if (__skb_gro_checksum_convert_check(skb)) \ - __skb_gro_checksum_convert(skb, \ - compute_pseudo(skb, proto)); \ -} while (0) - -struct gro_remcsum { - int offset; - __wsum delta; -}; - -static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) -{ - grc->offset = 0; - grc->delta = 0; -} - -static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, - unsigned int off, size_t hdrlen, - int start, int offset, - struct gro_remcsum *grc, - bool nopartial) -{ - __wsum delta; - size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); - - BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); - - if (!nopartial) { - NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start; - return ptr; - } - - ptr = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, off + plen)) { - ptr = skb_gro_header_slow(skb, off + plen, off); - if (!ptr) - return NULL; - } - - delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum, - start, offset); - - /* Adjust skb->csum since we changed the packet */ - NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); - - grc->offset = off + hdrlen + offset; - grc->delta = delta; - - return ptr; -} - -static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, - struct gro_remcsum *grc) -{ - void *ptr; - size_t plen = grc->offset + sizeof(u16); - - if (!grc->delta) - return; - - ptr = skb_gro_header_fast(skb, grc->offset); - if (skb_gro_header_hard(skb, grc->offset + sizeof(u16))) { - ptr = skb_gro_header_slow(skb, plen, grc->offset); - if (!ptr) - return; - } - - remcsum_unadjust((__sum16 *)ptr, grc->delta); -} - -#ifdef CONFIG_XFRM_OFFLOAD -static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) -{ - if (PTR_ERR(pp) != -EINPROGRESS) - NAPI_GRO_CB(skb)->flush |= flush; -} -static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, - struct sk_buff *pp, - int flush, - struct gro_remcsum *grc) -{ - if (PTR_ERR(pp) != -EINPROGRESS) { - NAPI_GRO_CB(skb)->flush |= flush; - skb_gro_remcsum_cleanup(skb, grc); - skb->remcsum_offload = 0; - } -} -#else -static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) -{ - NAPI_GRO_CB(skb)->flush |= flush; -} -static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, - struct sk_buff *pp, - int flush, - struct gro_remcsum *grc) -{ - NAPI_GRO_CB(skb)->flush |= flush; - skb_gro_remcsum_cleanup(skb, grc); - skb->remcsum_offload = 0; -} -#endif static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, diff --git a/include/net/gro.h b/include/net/gro.h index 01edaf3fdda0..1ffbe74b2e35 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -4,9 +4,367 @@ #define _NET_IPV6_GRO_H #include +#include +#include +#include +#include -struct list_head; -struct sk_buff; +struct napi_gro_cb { + /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ + void *frag0; + + /* Length of frag0. */ + unsigned int frag0_len; + + /* This indicates where we are processing relative to skb->data. */ + int data_offset; + + /* This is non-zero if the packet cannot be merged with the new skb. */ + u16 flush; + + /* Save the IP ID here and check when we get to the transport layer */ + u16 flush_id; + + /* Number of segments aggregated. */ + u16 count; + + /* Start offset for remote checksum offload */ + u16 gro_remcsum_start; + + /* jiffies when first packet was created/queued */ + unsigned long age; + + /* Used in ipv6_gro_receive() and foo-over-udp */ + u16 proto; + + /* This is non-zero if the packet may be of the same flow. */ + u8 same_flow:1; + + /* Used in tunnel GRO receive */ + u8 encap_mark:1; + + /* GRO checksum is valid */ + u8 csum_valid:1; + + /* Number of checksums via CHECKSUM_UNNECESSARY */ + u8 csum_cnt:3; + + /* Free the skb? */ + u8 free:2; +#define NAPI_GRO_FREE 1 +#define NAPI_GRO_FREE_STOLEN_HEAD 2 + + /* Used in foo-over-udp, set in udp[46]_gro_receive */ + u8 is_ipv6:1; + + /* Used in GRE, set in fou/gue_gro_receive */ + u8 is_fou:1; + + /* Used to determine if flush_id can be ignored */ + u8 is_atomic:1; + + /* Number of gro_receive callbacks this packet already went through */ + u8 recursion_counter:4; + + /* GRO is done by frag_list pointer chaining. */ + u8 is_flist:1; + + /* used to support CHECKSUM_COMPLETE for tunneling protocols */ + __wsum csum; + + /* used in skb_gro_receive() slow path */ + struct sk_buff *last; +}; + +#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) + +#define GRO_RECURSION_LIMIT 15 +static inline int gro_recursion_inc_test(struct sk_buff *skb) +{ + return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; +} + +typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *); +static inline struct sk_buff *call_gro_receive(gro_receive_t cb, + struct list_head *head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(head, skb); +} + +typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *, + struct sk_buff *); +static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, + struct sock *sk, + struct list_head *head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(sk, head, skb); +} + +static inline unsigned int skb_gro_offset(const struct sk_buff *skb) +{ + return NAPI_GRO_CB(skb)->data_offset; +} + +static inline unsigned int skb_gro_len(const struct sk_buff *skb) +{ + return skb->len - NAPI_GRO_CB(skb)->data_offset; +} + +static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) +{ + NAPI_GRO_CB(skb)->data_offset += len; +} + +static inline void *skb_gro_header_fast(struct sk_buff *skb, + unsigned int offset) +{ + return NAPI_GRO_CB(skb)->frag0 + offset; +} + +static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) +{ + return NAPI_GRO_CB(skb)->frag0_len < hlen; +} + +static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) +{ + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; +} + +static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, + unsigned int offset) +{ + if (!pskb_may_pull(skb, hlen)) + return NULL; + + skb_gro_frag0_invalidate(skb); + return skb->data + offset; +} + +static inline void *skb_gro_network_header(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + + skb_network_offset(skb); +} + +static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + + return csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb_gro_len(skb), proto, 0); +} + +static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) +{ + if (NAPI_GRO_CB(skb)->csum_valid) + NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum, + csum_partial(start, len, 0)); +} + +/* GRO checksum functions. These are logical equivalents of the normal + * checksum functions (in skbuff.h) except that they operate on the GRO + * offsets and fields in sk_buff. + */ + +__sum16 __skb_gro_checksum_complete(struct sk_buff *skb); + +static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb)); +} + +static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, + bool zero_okay, + __sum16 check) +{ + return ((skb->ip_summed != CHECKSUM_PARTIAL || + skb_checksum_start_offset(skb) < + skb_gro_offset(skb)) && + !skb_at_gro_remcsum_start(skb) && + NAPI_GRO_CB(skb)->csum_cnt == 0 && + (!zero_okay || check)); +} + +static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, + __wsum psum) +{ + if (NAPI_GRO_CB(skb)->csum_valid && + !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) + return 0; + + NAPI_GRO_CB(skb)->csum = psum; + + return __skb_gro_checksum_complete(skb); +} + +static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) +{ + if (NAPI_GRO_CB(skb)->csum_cnt > 0) { + /* Consume a checksum from CHECKSUM_UNNECESSARY */ + NAPI_GRO_CB(skb)->csum_cnt--; + } else { + /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we + * verified a new top level checksum or an encapsulated one + * during GRO. This saves work if we fallback to normal path. + */ + __skb_incr_checksum_unnecessary(skb); + } +} + +#define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ + compute_pseudo) \ +({ \ + __sum16 __ret = 0; \ + if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ + __ret = __skb_gro_checksum_validate_complete(skb, \ + compute_pseudo(skb, proto)); \ + if (!__ret) \ + skb_gro_incr_csum_unnecessary(skb); \ + __ret; \ +}) + +#define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) + +#define skb_gro_checksum_validate_zero_check(skb, proto, check, \ + compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) + +#define skb_gro_checksum_simple_validate(skb) \ + __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) + +static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->csum_cnt == 0 && + !NAPI_GRO_CB(skb)->csum_valid); +} + +static inline void __skb_gro_checksum_convert(struct sk_buff *skb, + __wsum pseudo) +{ + NAPI_GRO_CB(skb)->csum = ~pseudo; + NAPI_GRO_CB(skb)->csum_valid = 1; +} + +#define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \ +do { \ + if (__skb_gro_checksum_convert_check(skb)) \ + __skb_gro_checksum_convert(skb, \ + compute_pseudo(skb, proto)); \ +} while (0) + +struct gro_remcsum { + int offset; + __wsum delta; +}; + +static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) +{ + grc->offset = 0; + grc->delta = 0; +} + +static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, + unsigned int off, size_t hdrlen, + int start, int offset, + struct gro_remcsum *grc, + bool nopartial) +{ + __wsum delta; + size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); + + BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); + + if (!nopartial) { + NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start; + return ptr; + } + + ptr = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, off + plen)) { + ptr = skb_gro_header_slow(skb, off + plen, off); + if (!ptr) + return NULL; + } + + delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum, + start, offset); + + /* Adjust skb->csum since we changed the packet */ + NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); + + grc->offset = off + hdrlen + offset; + grc->delta = delta; + + return ptr; +} + +static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, + struct gro_remcsum *grc) +{ + void *ptr; + size_t plen = grc->offset + sizeof(u16); + + if (!grc->delta) + return; + + ptr = skb_gro_header_fast(skb, grc->offset); + if (skb_gro_header_hard(skb, grc->offset + sizeof(u16))) { + ptr = skb_gro_header_slow(skb, plen, grc->offset); + if (!ptr) + return; + } + + remcsum_unadjust((__sum16 *)ptr, grc->delta); +} + +#ifdef CONFIG_XFRM_OFFLOAD +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) +{ + if (PTR_ERR(pp) != -EINPROGRESS) + NAPI_GRO_CB(skb)->flush |= flush; +} +static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, + struct sk_buff *pp, + int flush, + struct gro_remcsum *grc) +{ + if (PTR_ERR(pp) != -EINPROGRESS) { + NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, grc); + skb->remcsum_offload = 0; + } +} +#else +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) +{ + NAPI_GRO_CB(skb)->flush |= flush; +} +static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, + struct sk_buff *pp, + int flush, + struct gro_remcsum *grc) +{ + NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, grc); + skb->remcsum_offload = 0; +} +#endif INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, struct sk_buff *)); @@ -15,6 +373,14 @@ INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); +INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); + +INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); + #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ ({ \ unlikely(gro_recursion_inc_test(skb)) ? \ @@ -22,4 +388,30 @@ INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); INDIRECT_CALL_INET(cb, f2, f1, head, skb); \ }) +struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, + struct udphdr *uh, struct sock *sk); +int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); + +static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) +{ + struct udphdr *uh; + unsigned int hlen, off; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*uh); + uh = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) + uh = skb_gro_header_slow(skb, hlen, off); + + return uh; +} + +static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) +{ + const struct ipv6hdr *iph = skb_gro_network_header(skb); + + return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, + skb_gro_len(skb), proto, 0)); +} + #endif /* _NET_IPV6_GRO_H */ diff --git a/include/net/ip.h b/include/net/ip.h index b71e88507c4a..7d1088888c10 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -568,14 +568,6 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow, flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } -static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) -{ - const struct iphdr *iph = skb_gro_network_header(skb); - - return csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb_gro_len(skb), proto, 0); -} - /* * Map a multicast IP onto multicast MAC for type ethernet. */ diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h index ea681910b7a3..c8a96b888277 100644 --- a/include/net/ip6_checksum.h +++ b/include/net/ip6_checksum.h @@ -43,14 +43,6 @@ static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto) skb->len, proto, 0)); } -static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) -{ - const struct ipv6hdr *iph = skb_gro_network_header(skb); - - return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, - skb_gro_len(skb), proto, 0)); -} - static __inline__ __sum16 tcp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, diff --git a/include/net/udp.h b/include/net/udp.h index 909ecf447e0f..f1c2a88c9005 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -167,36 +167,12 @@ static inline void udp_csum_pull_header(struct sk_buff *skb) typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, __be16 dport); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, - struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, - struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); -struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, - struct udphdr *uh, struct sock *sk); -int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); - struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, netdev_features_t features, bool is_ipv6); -static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) -{ - struct udphdr *uh; - unsigned int hlen, off; - - off = skb_gro_offset(skb); - hlen = off + sizeof(*uh); - uh = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) - uh = skb_gro_header_slow(skb, hlen, off); - - return uh; -} - /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ static inline int udp_lib_hash(struct sock *sk) { -- cgit v1.2.3 From 0b935d7f8c07bf0a192712bdbf76dbf45ef8b115 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 09:05:52 -0800 Subject: net: gro: move skb_gro_receive_list to udp_offload.c This helper is used once, no need to keep it in fat net/core/skbuff.c Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d95c9839ce90..ce6ee1453dbc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2903,7 +2903,6 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id); int netdev_get_name(struct net *net, char *name, int ifindex); int dev_restart(struct net_device *dev); int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); -int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, -- cgit v1.2.3 From e456a18a390b96f22b0de2acd4d0f49c72ed2280 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 09:05:53 -0800 Subject: net: gro: move skb_gro_receive into net/core/gro.c net/core/gro.c will contain all core gro functions, to shrink net/core/skbuff.c and net/core/dev.c Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - include/net/gro.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ce6ee1453dbc..93d397db9ec4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2902,7 +2902,6 @@ struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); struct net_device *dev_get_by_napi_id(unsigned int napi_id); int netdev_get_name(struct net *net, char *name, int ifindex); int dev_restart(struct net_device *dev); -int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, diff --git a/include/net/gro.h b/include/net/gro.h index 1ffbe74b2e35..f988bf3440f8 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -414,4 +414,6 @@ static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) skb_gro_len(skb), proto, 0)); } +int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); + #endif /* _NET_IPV6_GRO_H */ -- cgit v1.2.3 From 587652bbdd06ab38a4c1b85e40f933d2cf4a1147 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 09:05:54 -0800 Subject: net: gro: populate net/core/gro.c Move gro code and data from net/core/dev.c to net/core/gro.c to ease maintenance. gro_normal_list() and gro_normal_one() are inlined because they are called from both files. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/gro.h | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 93d397db9ec4..31a7e6b27681 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3657,6 +3657,7 @@ int netif_rx_ni(struct sk_buff *skb); int netif_rx_any_context(struct sk_buff *skb); int netif_receive_skb(struct sk_buff *skb); int netif_receive_skb_core(struct sk_buff *skb); +void netif_receive_skb_list_internal(struct list_head *head); void netif_receive_skb_list(struct list_head *head); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); diff --git a/include/net/gro.h b/include/net/gro.h index f988bf3440f8..d0e7df691a80 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -416,4 +416,26 @@ static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); +/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ +static inline void gro_normal_list(struct napi_struct *napi) +{ + if (!napi->rx_count) + return; + netif_receive_skb_list_internal(&napi->rx_list); + INIT_LIST_HEAD(&napi->rx_list); + napi->rx_count = 0; +} + +/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, + * pass the whole batch up to the stack. + */ +static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs) +{ + list_add_tail(&skb->list, &napi->rx_list); + napi->rx_count += segs; + if (napi->rx_count >= gro_normal_batch) + gro_normal_list(napi); +} + + #endif /* _NET_IPV6_GRO_H */ -- cgit v1.2.3 From 2a12ae5d433df3d3c3f1a930799ec09cb2b8058f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 09:11:47 -0800 Subject: net: inline sock_prot_inuse_add() sock_prot_inuse_add() is very small, we can inline it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 95cc03bd3fac..5a1e1df3cefd 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1429,13 +1429,21 @@ proto_memory_pressure(struct proto *prot) #ifdef CONFIG_PROC_FS +#define PROTO_INUSE_NR 64 /* should be enough for the first time */ +struct prot_inuse { + int val[PROTO_INUSE_NR]; +}; /* Called with local bh disabled */ -void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); +static inline void sock_prot_inuse_add(const struct net *net, + const struct proto *prot, int val) +{ + __this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val); +} int sock_prot_inuse_get(struct net *net, struct proto *proto); int sock_inuse_get(struct net *net); #else -static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, - int inc) +static inline void sock_prot_inuse_add(const struct net *net, + const struct proto *prot, int val) { } #endif -- cgit v1.2.3 From d477eb9004845cb2dc92ad5eed79a437738a868a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 09:11:48 -0800 Subject: net: make sock_inuse_add() available MPTCP hard codes it, let us instead provide this helper. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 5a1e1df3cefd..c4c981a51797 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1439,6 +1439,12 @@ static inline void sock_prot_inuse_add(const struct net *net, { __this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val); } + +static inline void sock_inuse_add(const struct net *net, int val) +{ + this_cpu_add(*net->core.sock_inuse, val); +} + int sock_prot_inuse_get(struct net *net, struct proto *proto); int sock_inuse_get(struct net *net); #else @@ -1446,6 +1452,10 @@ static inline void sock_prot_inuse_add(const struct net *net, const struct proto *prot, int val) { } + +static inline void sock_inuse_add(const struct net *net, int val) +{ +} #endif -- cgit v1.2.3 From 4199bae10c49e24bc2c5d8c06a68820d56640000 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 09:11:49 -0800 Subject: net: merge net->core.prot_inuse and net->core.sock_inuse net->core.sock_inuse is a per cpu variable (int), while net->core.prot_inuse is another per cpu variable of 64 integers. per cpu allocator tend to place them in very different places. Grouping them together makes sense, since it makes updates potentially faster, if hitting the same cache line. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/core.h | 1 - include/net/sock.h | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 36c2d998a43c..552bc25b1933 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -12,7 +12,6 @@ struct netns_core { int sysctl_somaxconn; #ifdef CONFIG_PROC_FS - int __percpu *sock_inuse; struct prot_inuse __percpu *prot_inuse; #endif }; diff --git a/include/net/sock.h b/include/net/sock.h index c4c981a51797..5589312531df 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1431,6 +1431,7 @@ proto_memory_pressure(struct proto *prot) #ifdef CONFIG_PROC_FS #define PROTO_INUSE_NR 64 /* should be enough for the first time */ struct prot_inuse { + int all; int val[PROTO_INUSE_NR]; }; /* Called with local bh disabled */ @@ -1442,7 +1443,7 @@ static inline void sock_prot_inuse_add(const struct net *net, static inline void sock_inuse_add(const struct net *net, int val) { - this_cpu_add(*net->core.sock_inuse, val); + this_cpu_add(net->core.prot_inuse->all, val); } int sock_prot_inuse_get(struct net *net, struct proto *proto); -- cgit v1.2.3 From b3cb764aa1d753cf6a58858f9e2097ba71e8100b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 09:11:50 -0800 Subject: net: drop nopreempt requirement on sock_prot_inuse_add() This is distracting really, let's make this simpler, because many callers had to take care of this by themselves, even if on x86 this adds more code than really needed. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 5589312531df..f09c0c4736c4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1434,11 +1434,11 @@ struct prot_inuse { int all; int val[PROTO_INUSE_NR]; }; -/* Called with local bh disabled */ + static inline void sock_prot_inuse_add(const struct net *net, const struct proto *prot, int val) { - __this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val); + this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val); } static inline void sock_inuse_add(const struct net *net, int val) -- cgit v1.2.3 From 2bd1b237616bd91a3f4f0cd94dc53cd6cba7aff9 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 11 Nov 2021 16:48:42 -0800 Subject: Bluetooth: hci_sync: Convert MGMT_OP_SET_DISCOVERABLE to use cmd_sync This makes MGMT_OP_SET_DISCOVERABLE use hci_cmd_sync_queue instead of use a dedicated discoverable_update work. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 -- include/net/bluetooth/hci_sync.h | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b5f061882c10..5f57ff81b7b8 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -496,7 +496,6 @@ struct hci_dev { struct work_struct bg_scan_update; struct work_struct scan_update; struct work_struct connectable_update; - struct work_struct discoverable_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; @@ -1828,7 +1827,6 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, void mgmt_smp_complete(struct hci_conn *conn, bool complete); bool mgmt_get_connectable(struct hci_dev *hdev); void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status); -void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status); u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev); void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 00b13e8ca800..d335c0ce8c5d 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -90,6 +90,9 @@ int hci_dev_close_sync(struct hci_dev *hdev); int hci_powered_update_sync(struct hci_dev *hdev); int hci_set_powered_sync(struct hci_dev *hdev, u8 val); +int hci_update_discoverable_sync(struct hci_dev *hdev); +int hci_update_discoverable(struct hci_dev *hdev); + int hci_start_discovery_sync(struct hci_dev *hdev); int hci_stop_discovery_sync(struct hci_dev *hdev); -- cgit v1.2.3 From f056a65783cce9c1279c1635d92768ce5962e4d6 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 11 Nov 2021 16:48:43 -0800 Subject: Bluetooth: hci_sync: Convert MGMT_OP_SET_CONNECTABLE to use cmd_sync This makes MGMT_OP_SET_CONNEABLE use hci_cmd_sync_queue instead of use a dedicated connetable_update work. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 -- include/net/bluetooth/hci_sync.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 5f57ff81b7b8..acb46ae27b5a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -495,7 +495,6 @@ struct hci_dev { struct work_struct discov_update; struct work_struct bg_scan_update; struct work_struct scan_update; - struct work_struct connectable_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; @@ -1826,7 +1825,6 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, u16 max_interval, u16 latency, u16 timeout); void mgmt_smp_complete(struct hci_conn *conn, bool complete); bool mgmt_get_connectable(struct hci_dev *hdev); -void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status); u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev); void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index d335c0ce8c5d..0336c1bc5d25 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -93,6 +93,8 @@ int hci_set_powered_sync(struct hci_dev *hdev, u8 val); int hci_update_discoverable_sync(struct hci_dev *hdev); int hci_update_discoverable(struct hci_dev *hdev); +int hci_update_connectable_sync(struct hci_dev *hdev); + int hci_start_discovery_sync(struct hci_dev *hdev); int hci_stop_discovery_sync(struct hci_dev *hdev); -- cgit v1.2.3 From 9482c5074a7d0bec682cc35a12d687f185684f31 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 11 Nov 2021 16:48:44 -0800 Subject: Bluetooth: hci_request: Remove bg_scan_update work This work is no longer necessary since all the code using it has been converted to use hci_passive_scan/hci_passive_scan_sync. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index acb46ae27b5a..2560cfe80db8 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -493,7 +493,6 @@ struct hci_dev { struct work_struct tx_work; struct work_struct discov_update; - struct work_struct bg_scan_update; struct work_struct scan_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; -- cgit v1.2.3 From 0f281a5e5b673698c246da5d7bf0d5fa427c47c5 Mon Sep 17 00:00:00 2001 From: Archie Pusaka Date: Thu, 11 Nov 2021 13:20:53 +0800 Subject: Bluetooth: Ignore HCI_ERROR_CANCELLED_BY_HOST on adv set terminated event This event is received when the controller stops advertising, specifically for these three reasons: (a) Connection is successfully created (success). (b) Timeout is reached (error). (c) Number of advertising events is reached (error). (*) This event is NOT generated when the host stops the advertisement. Refer to the BT spec ver 5.3 vol 4 part E sec 7.7.65.18. Note that the section was revised from BT spec ver 5.0 vol 2 part E sec 7.7.65.18 which was ambiguous about (*). Some chips (e.g. RTL8822CE) send this event when the host stops the advertisement with status = HCI_ERROR_CANCELLED_BY_HOST (due to (*) above). This is treated as an error and the advertisement will be removed and userspace will be informed via MGMT event. On suspend, we are supposed to temporarily disable advertisements, and continue advertising on resume. However, due to the behavior above, the advertisements are removed instead. This patch returns early if HCI_ERROR_CANCELLED_BY_HOST is received. Btmon snippet of the unexpected behavior: @ MGMT Command: Remove Advertising (0x003f) plen 1 Instance: 1 < HCI Command: LE Set Extended Advertising Enable (0x08|0x0039) plen 6 Extended advertising: Disabled (0x00) Number of sets: 1 (0x01) Entry 0 Handle: 0x01 Duration: 0 ms (0x00) Max ext adv events: 0 > HCI Event: LE Meta Event (0x3e) plen 6 LE Advertising Set Terminated (0x12) Status: Operation Cancelled by Host (0x44) Handle: 1 Connection handle: 0 Number of completed extended advertising events: 5 > HCI Event: Command Complete (0x0e) plen 4 LE Set Extended Advertising Enable (0x08|0x0039) ncmd 2 Status: Success (0x00) Signed-off-by: Archie Pusaka Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 63065bc01b76..84db6b275231 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -566,6 +566,7 @@ enum { #define HCI_ERROR_INVALID_LL_PARAMS 0x1e #define HCI_ERROR_UNSPECIFIED 0x1f #define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c +#define HCI_ERROR_CANCELLED_BY_HOST 0x44 /* Flow control modes */ #define HCI_FLOW_CTL_MODE_PACKET_BASED 0x00 -- cgit v1.2.3 From 77d641baa3c8e18a1056bec6c64c6103c1a17b1e Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Mon, 8 Nov 2021 17:27:05 +0100 Subject: power: supply: core: add POWER_SUPPLY_HEALTH_NO_BATTERY Some chargers can keep the system powered from the mains even when no battery is present. It this case none of the currently defined health statuses applies. Add a new status to report that no battery is present. Suggested-by: Sebastian Reichel Signed-off-by: Luca Ceresoli Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 9ca1f120a211..2d1318fe2455 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -66,6 +66,7 @@ enum { POWER_SUPPLY_HEALTH_WARM, POWER_SUPPLY_HEALTH_COOL, POWER_SUPPLY_HEALTH_HOT, + POWER_SUPPLY_HEALTH_NO_BATTERY, }; enum { -- cgit v1.2.3 From 451dc48c806a7ce9fbec5e7a24ccf4b2c936e834 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 11 Nov 2021 22:09:16 -0500 Subject: net: ieee802154: handle iftypes as u32 This patch fixes an issue that an u32 netlink value is handled as a signed enum value which doesn't fit into the range of u32 netlink type. If it's handled as -1 value some BIT() evaluation ends in a shift-out-of-bounds issue. To solve the issue we set the to u32 max which is s32 "-1" value to keep backwards compatibility and let the followed enum values start counting at 0. This brings the compiler to never handle the enum as signed and a check if the value is above NL802154_IFTYPE_MAX should filter -1 out. Fixes: f3ea5e44231a ("ieee802154: add new interface command") Signed-off-by: Alexander Aring Link: https://lore.kernel.org/r/20211112030916.685793-1-aahringo@redhat.com Signed-off-by: Stefan Schmidt --- include/net/nl802154.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/nl802154.h b/include/net/nl802154.h index ddcee128f5d9..145acb8f2509 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -19,6 +19,8 @@ * */ +#include + #define NL802154_GENL_NAME "nl802154" enum nl802154_commands { @@ -150,10 +152,9 @@ enum nl802154_attrs { }; enum nl802154_iftype { - /* for backwards compatibility TODO */ - NL802154_IFTYPE_UNSPEC = -1, + NL802154_IFTYPE_UNSPEC = (~(__u32)0), - NL802154_IFTYPE_NODE, + NL802154_IFTYPE_NODE = 0, NL802154_IFTYPE_MONITOR, NL802154_IFTYPE_COORD, -- cgit v1.2.3 From 83dde7498fefeb920b1def317421262317d178e5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 7 Nov 2021 08:40:47 +0200 Subject: RDMA/netlink: Add __maybe_unused to static inline in C file Like other commits in the tree add __maybe_unused to a static inline in a C file because some clang compilers will complain about unused code: >> drivers/infiniband/core/nldev.c:2543:1: warning: unused function '__chk_RDMA_NL_NLDEV' MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5); ^ Fixes: e3bf14bdc17a ("rdma: Autoload netlink client modules") Link: https://lore.kernel.org/r/4a8101919b765e01d7fde6f27fd572c958deeb4a.1636267207.git.leonro@nvidia.com Reported-by: kernel test robot Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/rdma_netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 2758d9df71ee..c2a79aeee113 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -30,7 +30,7 @@ enum rdma_nl_flags { * constant as well and the compiler checks they are the same. */ #define MODULE_ALIAS_RDMA_NETLINK(_index, _val) \ - static inline void __chk_##_index(void) \ + static inline void __maybe_unused __chk_##_index(void) \ { \ BUILD_BUG_ON(_index != _val); \ } \ -- cgit v1.2.3 From d7751d6476185ff754b9dad2cba0c0a6e43ecadc Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 20 May 2021 17:09:58 +0300 Subject: net/mlx5: E-Switch, Fix resetting of encap mode when entering switchdev E-Switch encap mode is relevant only when in switchdev mode. The RDMA driver can query the encap configuration via mlx5_eswitch_get_encap_mode(). Make sure it returns the currently used mode and not the set one. This reverts the cited commit which reset the encap mode on entering switchdev and fixes the original issue properly. Fixes: 9a64144d683a ("net/mlx5: E-Switch, Fix default encap mode") Signed-off-by: Paul Blakey Reviewed-by: Mark Bloch Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 97afcea39a7b..8b18fe9771f9 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -145,13 +145,13 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, \ ESW_TUN_OPTS_OFFSET + 1) -u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); +u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev); u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw); #else /* CONFIG_MLX5_ESWITCH */ -static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev) +static inline u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev) { return MLX5_ESWITCH_NONE; } -- cgit v1.2.3 From c2c60ea37e5b6be58c9dd7aff0b2e86ba0f18e0b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 09:23:01 -0800 Subject: once: use __section(".data.once") .data.once contains nicely packed bool variables. It is used already by DO_ONCE_LITE(). Using it also in DO_ONCE() removes holes in .data section. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/once.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/once.h b/include/linux/once.h index d361fb14ac3a..f54523052bbc 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -38,7 +38,7 @@ void __do_once_done(bool *done, struct static_key_true *once_key, #define DO_ONCE(func, ...) \ ({ \ bool ___ret = false; \ - static bool ___done = false; \ + static bool __section(".data.once") ___done = false; \ static DEFINE_STATIC_KEY_TRUE(___once_key); \ if (static_branch_unlikely(&___once_key)) { \ unsigned long ___flags; \ -- cgit v1.2.3 From 7071732c26fe2cf141185ed16a8a85d02495ae8c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 09:23:02 -0800 Subject: net: use .data.once section in netdev_level_once() Same rationale than prior patch : using the dedicated section avoid holes and pack all these bool values. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 31a7e6b27681..dd328364dfe9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4942,7 +4942,7 @@ void netdev_info(const struct net_device *dev, const char *format, ...); #define netdev_level_once(level, dev, fmt, ...) \ do { \ - static bool __print_once __read_mostly; \ + static bool __section(".data.once") __print_once; \ \ if (!__print_once) { \ __print_once = true; \ -- cgit v1.2.3 From 49ecc2e9c3abd269951972fa8b23a4d081111b80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2021 09:23:03 -0800 Subject: net: align static siphash keys siphash keys use 16 bytes. Define siphash_aligned_key_t macro so that we can make sure they are not crossing a cache line boundary. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/siphash.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/siphash.h b/include/linux/siphash.h index bf21591a9e5e..3f7427b9e935 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -21,6 +21,8 @@ typedef struct { u64 key[2]; } siphash_key_t; +#define siphash_aligned_key_t siphash_key_t __aligned(16) + static inline bool siphash_key_is_zero(const siphash_key_t *key) { return !(key->key[0] | key->key[1]); -- cgit v1.2.3 From b9241f54138ca5af4d3c5ca6db56be83d7491508 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 15 Nov 2021 17:11:17 +0000 Subject: net: document SMII and correct phylink's new validation mechanism SMII has not been documented in the kernel, but information on this PHY interface mode has been recently found. Document it, and correct the recently introduced phylink handling for this interface mode. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/E1mmfVl-0075nP-14@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 96e43fbb2dd8..1e57cdd95da3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -99,7 +99,7 @@ extern const int phy_10gbit_features_array[1]; * @PHY_INTERFACE_MODE_RGMII_RXID: RGMII with Internal RX delay * @PHY_INTERFACE_MODE_RGMII_TXID: RGMII with Internal RX delay * @PHY_INTERFACE_MODE_RTBI: Reduced TBI - * @PHY_INTERFACE_MODE_SMII: ??? MII + * @PHY_INTERFACE_MODE_SMII: Serial MII * @PHY_INTERFACE_MODE_XGMII: 10 gigabit media-independent interface * @PHY_INTERFACE_MODE_XLGMII:40 gigabit media-independent interface * @PHY_INTERFACE_MODE_MOCA: Multimedia over Coax -- cgit v1.2.3 From 5854d4a6cc356ba3e16d8593ac1c089a32d1759c Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Fri, 29 Oct 2021 20:26:12 +0300 Subject: mtd: spi-nor: Get rid of nor->page_size nor->page_size duplicated what nor->params->page_size indicates for no good reason. page_size is a flash parameter of fixed value and it is better suited to be found in nor->params->page_size. Signed-off-by: Tudor Ambarus Reviewed-by: Pratyush Yadav Reviewed-by: Michael Walle Link: https://lore.kernel.org/r/20211029172633.886453-5-tudor.ambarus@microchip.com --- include/linux/mtd/spi-nor.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index f67457748ed8..fc90fce26e33 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -371,7 +371,6 @@ struct spi_nor_flash_parameter; * @bouncebuf_size: size of the bounce buffer * @info: SPI NOR part JEDEC MFR ID and other info * @manufacturer: SPI NOR manufacturer - * @page_size: the page size of the SPI NOR * @addr_width: number of address bytes * @erase_opcode: the opcode for erasing a sector * @read_opcode: the read opcode @@ -401,7 +400,6 @@ struct spi_nor { size_t bouncebuf_size; const struct flash_info *info; const struct spi_nor_manufacturer *manufacturer; - u32 page_size; u8 addr_width; u8 erase_opcode; u8 read_opcode; -- cgit v1.2.3 From 1e2c3ef0496e72ba9001da5fd1b7ed56ccb30597 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Mon, 4 Oct 2021 16:11:52 +0200 Subject: tee: export teedev_open() and teedev_close_context() Exports the two functions teedev_open() and teedev_close_context() in order to make it easier to create a driver internal struct tee_context. Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index a1f03461369b..468a7d83dc6c 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -587,4 +587,18 @@ struct tee_client_driver { #define to_tee_client_driver(d) \ container_of(d, struct tee_client_driver, driver) +/** + * teedev_open() - Open a struct tee_device + * @teedev: Device to open + * + * @return a pointer to struct tee_context on success or an ERR_PTR on failure. + */ +struct tee_context *teedev_open(struct tee_device *teedev); + +/** + * teedev_close_context() - closes a struct tee_context + * @ctx: The struct tee_context to close + */ +void teedev_close_context(struct tee_context *ctx); + #endif /*__TEE_DRV_H*/ -- cgit v1.2.3 From 49c39ec4670a8f045729e3717af2e1a74caf89a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 13 Sep 2021 14:21:25 +0200 Subject: dma-buf: nuke dma_resv_get_excl_unlocked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Heureka, that's finally not used any more. Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210917123513.1106-27-christian.koenig@amd.com --- include/linux/dma-resv.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include') diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 09c6063b199a..eebf04325b34 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -440,32 +440,6 @@ dma_resv_excl_fence(struct dma_resv *obj) return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj)); } -/** - * dma_resv_get_excl_unlocked - get the reservation object's - * exclusive fence, without lock held. - * @obj: the reservation object - * - * If there is an exclusive fence, this atomically increments it's - * reference count and returns it. - * - * RETURNS - * The exclusive fence or NULL if none - */ -static inline struct dma_fence * -dma_resv_get_excl_unlocked(struct dma_resv *obj) -{ - struct dma_fence *fence; - - if (!rcu_access_pointer(obj->fence_excl)) - return NULL; - - rcu_read_lock(); - fence = dma_fence_get_rcu_safe(&obj->fence_excl); - rcu_read_unlock(); - - return fence; -} - /** * dma_resv_shared_list - get the reservation object's shared fence list * @obj: the reservation object -- cgit v1.2.3 From 2fb75e1b642f49253d8848c9e47e8942f5366221 Mon Sep 17 00:00:00 2001 From: Liu Xinpeng Date: Mon, 25 Oct 2021 11:46:26 +0800 Subject: psi: Add a missing SPDX license header Add the missing SPDX license header to include/linux/psi.h include/linux/psi_types.h kernel/sched/psi.c Signed-off-by: Liu Xinpeng Signed-off-by: Peter Zijlstra (Intel) Acked-by: Johannes Weiner Link: https://lore.kernel.org/r/1635133586-84611-2-git-send-email-liuxp11@chinatelecom.cn --- include/linux/psi.h | 1 + include/linux/psi_types.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/psi.h b/include/linux/psi.h index 65eb1476ac70..a70ca833c6d7 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PSI_H #define _LINUX_PSI_H diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 0a23300d49af..bf50068d5d4b 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PSI_TYPES_H #define _LINUX_PSI_TYPES_H -- cgit v1.2.3 From 4feee7d12603deca8775f9f9ae5e121093837444 Mon Sep 17 00:00:00 2001 From: Josh Don Date: Mon, 18 Oct 2021 13:34:28 -0700 Subject: sched/core: Forced idle accounting Adds accounting for "forced idle" time, which is time where a cookie'd task forces its SMT sibling to idle, despite the presence of runnable tasks. Forced idle time is one means to measure the cost of enabling core scheduling (ie. the capacity lost due to the need to force idle). Forced idle time is attributed to the thread responsible for causing the forced idle. A few details: - Forced idle time is displayed via /proc/PID/sched. It also requires that schedstats is enabled. - Forced idle is only accounted when a sibling hyperthread is held idle despite the presence of runnable tasks. No time is charged if a sibling is idle but has no runnable tasks. - Tasks with 0 cookie are never charged forced idle. - For SMT > 2, we scale the amount of forced idle charged based on the number of forced idle siblings. Additionally, we split the time up and evenly charge it to all running tasks, as each is equally responsible for the forced idle. Signed-off-by: Josh Don Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20211018203428.2025792-1-joshdon@google.com --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 78c351e35fec..d2e261adb8ea 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -523,7 +523,11 @@ struct sched_statistics { u64 nr_wakeups_affine_attempts; u64 nr_wakeups_passive; u64 nr_wakeups_idle; + +#ifdef CONFIG_SCHED_CORE + u64 core_forceidle_sum; #endif +#endif /* CONFIG_SCHEDSTATS */ } ____cacheline_aligned; struct sched_entity { -- cgit v1.2.3 From cb0e52b7748737b2cf6481fdd9b920ce7e1ebbdf Mon Sep 17 00:00:00 2001 From: Brian Chen Date: Wed, 10 Nov 2021 21:33:12 +0000 Subject: psi: Fix PSI_MEM_FULL state when tasks are in memstall and doing reclaim We've noticed cases where tasks in a cgroup are stalled on memory but there is little memory FULL pressure since tasks stay on the runqueue in reclaim. A simple example involves a single threaded program that keeps leaking and touching large amounts of memory. It runs in a cgroup with swap enabled, memory.high set at 10M and cpu.max ratio set at 5%. Though there is significant CPU pressure and memory SOME, there is barely any memory FULL since the task enters reclaim and stays on the runqueue. However, this memory-bound task is effectively stalled on memory and we expect memory FULL to match memory SOME in this scenario. The code is confused about memstall && running, thinking there is a stalled task and a productive task when there's only one task: a reclaimer that's counted as both. To fix this, we redefine the condition for PSI_MEM_FULL to check that all running tasks are in an active memstall instead of checking that there are no running tasks. case PSI_MEM_FULL: - return unlikely(tasks[NR_MEMSTALL] && !tasks[NR_RUNNING]); + return unlikely(tasks[NR_MEMSTALL] && + tasks[NR_RUNNING] == tasks[NR_MEMSTALL_RUNNING]); This will capture reclaimers. It will also capture tasks that called psi_memstall_enter() and are about to sleep, but this should be negligible noise. Signed-off-by: Brian Chen Signed-off-by: Peter Zijlstra (Intel) Acked-by: Johannes Weiner Link: https://lore.kernel.org/r/20211110213312.310243-1-brianchen118@gmail.com --- include/linux/psi_types.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index bf50068d5d4b..516c0fe836fd 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -22,7 +22,17 @@ enum psi_task_count { * don't have to special case any state tracking for it. */ NR_ONCPU, - NR_PSI_TASK_COUNTS = 4, + /* + * For IO and CPU stalls the presence of running/oncpu tasks + * in the domain means a partial rather than a full stall. + * For memory it's not so simple because of page reclaimers: + * they are running/oncpu while representing a stall. To tell + * whether a domain has productivity left or not, we need to + * distinguish between regular running (i.e. productive) + * threads and memstall ones. + */ + NR_MEMSTALL_RUNNING, + NR_PSI_TASK_COUNTS = 5, }; /* Task state bitmasks */ @@ -30,6 +40,7 @@ enum psi_task_count { #define TSK_MEMSTALL (1 << NR_MEMSTALL) #define TSK_RUNNING (1 << NR_RUNNING) #define TSK_ONCPU (1 << NR_ONCPU) +#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING) /* Resources that workloads could be stalled on */ enum psi_res { -- cgit v1.2.3 From ff083a2d972f56bebfd82409ca62e5dfce950961 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 Nov 2021 02:07:22 +0000 Subject: perf: Protect perf_guest_cbs with RCU Protect perf_guest_cbs with RCU to fix multiple possible errors. Luckily, all paths that read perf_guest_cbs already require RCU protection, e.g. to protect the callback chains, so only the direct perf_guest_cbs touchpoints need to be modified. Bug #1 is a simple lack of WRITE_ONCE/READ_ONCE behavior to ensure perf_guest_cbs isn't reloaded between a !NULL check and a dereference. Fixed via the READ_ONCE() in rcu_dereference(). Bug #2 is that on weakly-ordered architectures, updates to the callbacks themselves are not guaranteed to be visible before the pointer is made visible to readers. Fixed by the smp_store_release() in rcu_assign_pointer() when the new pointer is non-NULL. Bug #3 is that, because the callbacks are global, it's possible for readers to run in parallel with an unregisters, and thus a module implementing the callbacks can be unloaded while readers are in flight, resulting in a use-after-free. Fixed by a synchronize_rcu() call when unregistering callbacks. Bug #1 escaped notice because it's extremely unlikely a compiler will reload perf_guest_cbs in this sequence. perf_guest_cbs does get reloaded for future derefs, e.g. for ->is_user_mode(), but the ->is_in_guest() guard all but guarantees the consumer will win the race, e.g. to nullify perf_guest_cbs, KVM has to completely exit the guest and teardown down all VMs before KVM start its module unload / unregister sequence. This also makes it all but impossible to encounter bug #3. Bug #2 has not been a problem because all architectures that register callbacks are strongly ordered and/or have a static set of callbacks. But with help, unloading kvm_intel can trigger bug #1 e.g. wrapping perf_guest_cbs with READ_ONCE in perf_misc_flags() while spamming kvm_intel module load/unload leads to: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP CPU: 6 PID: 1825 Comm: stress Not tainted 5.14.0-rc2+ #459 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:perf_misc_flags+0x1c/0x70 Call Trace: perf_prepare_sample+0x53/0x6b0 perf_event_output_forward+0x67/0x160 __perf_event_overflow+0x52/0xf0 handle_pmi_common+0x207/0x300 intel_pmu_handle_irq+0xcf/0x410 perf_event_nmi_handler+0x28/0x50 nmi_handle+0xc7/0x260 default_do_nmi+0x6b/0x170 exc_nmi+0x103/0x130 asm_exc_nmi+0x76/0xbf Fixes: 39447b386c84 ("perf: Enhance perf to allow for guest statistic collection from host") Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Paolo Bonzini Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211111020738.2512932-2-seanjc@google.com --- include/linux/perf_event.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0dcfd265beed..318c489b735b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1240,7 +1240,18 @@ extern void perf_event_bpf_event(struct bpf_prog *prog, enum perf_bpf_event_type type, u16 flags); -extern struct perf_guest_info_callbacks *perf_guest_cbs; +extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; +static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void) +{ + /* + * Callbacks are RCU-protected and must be READ_ONCE to avoid reloading + * the callbacks between a !NULL check and dereferences, to ensure + * pending stores/changes to the callback pointers are visible before a + * non-NULL perf_guest_cbs is visible to readers, and to prevent a + * module from unloading callbacks while readers are active. + */ + return rcu_dereference(perf_guest_cbs); +} extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); -- cgit v1.2.3 From 2934e3d09350c1a7ca2433fbeabfcd831e48a575 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 Nov 2021 02:07:25 +0000 Subject: perf: Stop pretending that perf can handle multiple guest callbacks Drop the 'int' return value from the perf (un)register callbacks helpers and stop pretending perf can support multiple callbacks. The 'int' returns are not future proofing anything as none of the callers take action on an error. It's also not obvious that there will ever be co-tenant hypervisors, and if there are, that allowing multiple callbacks to be registered is desirable or even correct. Opportunistically rename callbacks=>cbs in the affected declarations to match their definitions. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Paolo Bonzini Link: https://lore.kernel.org/r/20211111020738.2512932-5-seanjc@google.com --- include/linux/perf_event.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 318c489b735b..98c204488496 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1252,8 +1252,8 @@ static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void) */ return rcu_dereference(perf_guest_cbs); } -extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); -extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); +extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); +extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); @@ -1497,10 +1497,10 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } static inline void perf_bp_event(struct perf_event *event, void *data) { } -static inline int perf_register_guest_info_callbacks -(struct perf_guest_info_callbacks *callbacks) { return 0; } -static inline int perf_unregister_guest_info_callbacks -(struct perf_guest_info_callbacks *callbacks) { return 0; } +static inline void perf_register_guest_info_callbacks +(struct perf_guest_info_callbacks *cbs) { } +static inline void perf_unregister_guest_info_callbacks +(struct perf_guest_info_callbacks *cbs) { } static inline void perf_event_mmap(struct vm_area_struct *vma) { } -- cgit v1.2.3 From b9f5621c9547dd787900f005a9e1c3d5712de512 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Thu, 11 Nov 2021 02:07:27 +0000 Subject: perf/core: Rework guest callbacks to prepare for static_call support To prepare for using static_calls to optimize perf's guest callbacks, replace ->is_in_guest and ->is_user_mode with a new multiplexed hook ->state, tweak ->handle_intel_pt_intr to play nice with being called when there is no active guest, and drop "guest" from ->get_guest_ip. Return '0' from ->state and ->handle_intel_pt_intr to indicate "not in guest" so that DEFINE_STATIC_CALL_RET0 can be used to define the static calls, i.e. no callback == !guest. [sean: extracted from static_call patch, fixed get_ip() bug, wrote changelog] Suggested-by: Peter Zijlstra (Intel) Originally-by: Peter Zijlstra (Intel) Signed-off-by: Like Xu Signed-off-by: Zhu Lingshan Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Boris Ostrovsky Reviewed-by: Paolo Bonzini Link: https://lore.kernel.org/r/20211111020738.2512932-7-seanjc@google.com --- include/linux/perf_event.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 98c204488496..5e6b346d62a7 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -26,11 +26,13 @@ # include #endif +#define PERF_GUEST_ACTIVE 0x01 +#define PERF_GUEST_USER 0x02 + struct perf_guest_info_callbacks { - int (*is_in_guest)(void); - int (*is_user_mode)(void); - unsigned long (*get_guest_ip)(void); - void (*handle_intel_pt_intr)(void); + unsigned int (*state)(void); + unsigned long (*get_ip)(void); + unsigned int (*handle_intel_pt_intr)(void); }; #ifdef CONFIG_HAVE_HW_BREAKPOINT -- cgit v1.2.3 From 1c3430516b0732d923de9fd3bfb3e2e537eeb235 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 Nov 2021 02:07:28 +0000 Subject: perf: Add wrappers for invoking guest callbacks Add helpers for the guest callbacks to prepare for burying the callbacks behind a Kconfig (it's a lot easier to provide a few stubs than to #ifdef piles of code), and also to prepare for converting the callbacks to static_call(). perf_instruction_pointer() in particular will have subtle semantics with static_call(), as the "no callbacks" case will return 0 if the callbacks are unregistered between querying guest state and getting the IP. Implement the change now to avoid a functional change when adding static_call() support, and because the new helper needs to return _something_ in this case. Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Paolo Bonzini Link: https://lore.kernel.org/r/20211111020738.2512932-8-seanjc@google.com --- include/linux/perf_event.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5e6b346d62a7..346d5aff5804 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1254,6 +1254,30 @@ static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void) */ return rcu_dereference(perf_guest_cbs); } +static inline unsigned int perf_guest_state(void) +{ + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + + return guest_cbs ? guest_cbs->state() : 0; +} +static inline unsigned long perf_guest_get_ip(void) +{ + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + + /* + * Arbitrarily return '0' in the unlikely scenario that the callbacks + * are unregistered between checking guest state and getting the IP. + */ + return guest_cbs ? guest_cbs->get_ip() : 0; +} +static inline unsigned int perf_guest_handle_intel_pt_intr(void) +{ + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + + if (guest_cbs && guest_cbs->handle_intel_pt_intr) + return guest_cbs->handle_intel_pt_intr(); + return 0; +} extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); -- cgit v1.2.3 From 2aef6f306b39bbe74e2287d6e2ee07c4867d87d0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 Nov 2021 02:07:29 +0000 Subject: perf: Force architectures to opt-in to guest callbacks Introduce GUEST_PERF_EVENTS and require architectures to select it to allow registering and using guest callbacks in perf. This will hopefully make it more difficult for new architectures to add useless "support" for guest callbacks, e.g. via copy+paste. Stubbing out the helpers has the happy bonus of avoiding a load of perf_guest_cbs when GUEST_PERF_EVENTS=n on arm64/x86. Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Paolo Bonzini Link: https://lore.kernel.org/r/20211111020738.2512932-9-seanjc@google.com --- include/linux/perf_event.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 346d5aff5804..ea47ef616ee0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1242,6 +1242,7 @@ extern void perf_event_bpf_event(struct bpf_prog *prog, enum perf_bpf_event_type type, u16 flags); +#ifdef CONFIG_GUEST_PERF_EVENTS extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void) { @@ -1280,6 +1281,11 @@ static inline unsigned int perf_guest_handle_intel_pt_intr(void) } extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); +#else +static inline unsigned int perf_guest_state(void) { return 0; } +static inline unsigned long perf_guest_get_ip(void) { return 0; } +static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; } +#endif /* CONFIG_GUEST_PERF_EVENTS */ extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); -- cgit v1.2.3 From 87b940a0675e25261f022ac3e53e0dfff9cdb995 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 Nov 2021 02:07:30 +0000 Subject: perf/core: Use static_call to optimize perf_guest_info_callbacks Use static_call to optimize perf's guest callbacks on arm64 and x86, which are now the only architectures that define the callbacks. Use DEFINE_STATIC_CALL_RET0 as the default/NULL for all guest callbacks, as the callback semantics are that a return value '0' means "not in guest". static_call obviously avoids the overhead of CONFIG_RETPOLINE=y, but is also advantageous versus other solutions, e.g. per-cpu callbacks, in that a per-cpu memory load is not needed to detect the !guest case. Based on code from Peter and Like. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Paolo Bonzini Link: https://lore.kernel.org/r/20211111020738.2512932-10-seanjc@google.com --- include/linux/perf_event.h | 34 ++++++++-------------------------- 1 file changed, 8 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ea47ef616ee0..0ac7d867ca0c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1244,40 +1244,22 @@ extern void perf_event_bpf_event(struct bpf_prog *prog, #ifdef CONFIG_GUEST_PERF_EVENTS extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; -static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void) -{ - /* - * Callbacks are RCU-protected and must be READ_ONCE to avoid reloading - * the callbacks between a !NULL check and dereferences, to ensure - * pending stores/changes to the callback pointers are visible before a - * non-NULL perf_guest_cbs is visible to readers, and to prevent a - * module from unloading callbacks while readers are active. - */ - return rcu_dereference(perf_guest_cbs); -} + +DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state); +DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip); +DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr); + static inline unsigned int perf_guest_state(void) { - struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); - - return guest_cbs ? guest_cbs->state() : 0; + return static_call(__perf_guest_state)(); } static inline unsigned long perf_guest_get_ip(void) { - struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); - - /* - * Arbitrarily return '0' in the unlikely scenario that the callbacks - * are unregistered between checking guest state and getting the IP. - */ - return guest_cbs ? guest_cbs->get_ip() : 0; + return static_call(__perf_guest_get_ip)(); } static inline unsigned int perf_guest_handle_intel_pt_intr(void) { - struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); - - if (guest_cbs && guest_cbs->handle_intel_pt_intr) - return guest_cbs->handle_intel_pt_intr(); - return 0; + return static_call(__perf_guest_handle_intel_pt_intr)(); } extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); -- cgit v1.2.3 From e1bfc24577cc65c95dc519d7621a9c985b97e567 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 Nov 2021 02:07:33 +0000 Subject: KVM: Move x86's perf guest info callbacks to generic KVM Move x86's perf guest callbacks into common KVM, as they are semantically identical to arm64's callbacks (the only other such KVM callbacks). arm64 will convert to the common versions in a future patch. Implement the necessary arm64 arch hooks now to avoid having to provide stubs or a temporary #define (from x86) to avoid arm64 compilation errors when CONFIG_GUEST_PERF_EVENTS=y. Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Paolo Bonzini Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20211111020738.2512932-13-seanjc@google.com --- include/linux/kvm_host.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9e0667e3723e..9df7ab2d7530 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1170,6 +1170,16 @@ static inline bool kvm_arch_intc_initialized(struct kvm *kvm) } #endif +#ifdef CONFIG_GUEST_PERF_EVENTS +unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu); + +void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void)); +void kvm_unregister_perf_callbacks(void); +#else +static inline void kvm_register_perf_callbacks(void *ign) {} +static inline void kvm_unregister_perf_callbacks(void) {} +#endif /* CONFIG_GUEST_PERF_EVENTS */ + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); void kvm_arch_sync_events(struct kvm *kvm); -- cgit v1.2.3 From be399d824b432a85f8df86b566d2e5994fdf58b0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 Nov 2021 02:07:36 +0000 Subject: KVM: arm64: Hide kvm_arm_pmu_available behind CONFIG_HW_PERF_EVENTS=y Move the definition of kvm_arm_pmu_available to pmu-emul.c and, out of "necessity", hide it behind CONFIG_HW_PERF_EVENTS. Provide a stub for the key's wrapper, kvm_arm_support_pmu_v3(). Moving the key's definition out of perf.c will allow a future commit to delete perf.c entirely. Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211111020738.2512932-16-seanjc@google.com --- include/kvm/arm_pmu.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 90f21898aad8..f9ed4c171d7b 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -13,13 +13,6 @@ #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) #define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1) -DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available); - -static __always_inline bool kvm_arm_support_pmu_v3(void) -{ - return static_branch_likely(&kvm_arm_pmu_available); -} - #ifdef CONFIG_HW_PERF_EVENTS struct kvm_pmc { @@ -36,6 +29,13 @@ struct kvm_pmu { struct irq_work overflow_work; }; +DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available); + +static __always_inline bool kvm_arm_support_pmu_v3(void) +{ + return static_branch_likely(&kvm_arm_pmu_available); +} + #define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS) u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); @@ -65,6 +65,11 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); struct kvm_pmu { }; +static inline bool kvm_arm_support_pmu_v3(void) +{ + return false; +} + #define kvm_arm_pmu_irq_initialized(v) (false) static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) -- cgit v1.2.3 From a9f4a6e92b3b319296fb078da2615f618f6cd80c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 11 Nov 2021 02:07:38 +0000 Subject: perf: Drop guest callback (un)register stubs Drop perf's stubs for (un)registering guest callbacks now that KVM registration of callbacks is hidden behind GUEST_PERF_EVENTS=y. The only other user is x86 XEN_PV, and x86 unconditionally selects PERF_EVENTS. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Paolo Bonzini Link: https://lore.kernel.org/r/20211111020738.2512932-18-seanjc@google.com --- include/linux/perf_event.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0ac7d867ca0c..7b7525e9155f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1511,11 +1511,6 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } static inline void perf_bp_event(struct perf_event *event, void *data) { } -static inline void perf_register_guest_info_callbacks -(struct perf_guest_info_callbacks *cbs) { } -static inline void perf_unregister_guest_info_callbacks -(struct perf_guest_info_callbacks *cbs) { } - static inline void perf_event_mmap(struct vm_area_struct *vma) { } typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); -- cgit v1.2.3 From f45b2974cc0ae959a4c503a071e38a56bd64372f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 17 Nov 2021 13:57:08 +0100 Subject: bpf, x86: Fix "no previous prototype" warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The arch_prepare_bpf_dispatcher function does not have a prototype, and yields the following warning when W=1 is enabled for the kernel build. >> arch/x86/net/bpf_jit_comp.c:2188:5: warning: no previous \ prototype for 'arch_prepare_bpf_dispatcher' [-Wmissing-prototypes] 2188 | int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, \ int num_funcs) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ Remove the warning by adding a function declaration to include/linux/bpf.h. Fixes: 75ccbef6369e ("bpf: Introduce BPF dispatcher") Reported-by: kernel test robot Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211117125708.769168-1-bjorn@kernel.org --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e7a163a3146b..84ff6ef49462 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -732,6 +732,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); +int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ -- cgit v1.2.3 From cf9acc90c80ecbee00334aa85d92f4e74014bcff Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Tue, 16 Nov 2021 17:42:42 +0000 Subject: net: virtio_net_hdr_to_skb: count transport header in UFO virtio_net_hdr_to_skb does not set the skb's gso_size and gso_type correctly for UFO packets received via virtio-net that are a little over the GSO size. This can lead to problems elsewhere in the networking stack, e.g. ovs_vport_send dropping over-sized packets if gso_size is not set. This is due to the comparison if (skb->len - p_off > gso_size) not properly accounting for the transport layer header. p_off includes the size of the transport layer header (thlen), so skb->len - p_off is the size of the TCP/UDP payload. gso_size is read from the virtio-net header. For UFO, fragmentation happens at the IP level so does not need to include the UDP header. Hence the calculation could be comparing a TCP/UDP payload length with an IP payload length, causing legitimate virtio-net packets to have lack gso_type/gso_size information. Example: a UDP packet with payload size 1473 has IP payload size 1481. If the guest used UFO, it is not fragmented and the virtio-net header's flags indicate that it is a GSO frame (VIRTIO_NET_HDR_GSO_UDP), with gso_size = 1480 for an MTU of 1500. skb->len will be 1515 and p_off will be 42, so skb->len - p_off = 1473. Hence the comparison fails, and shinfo->gso_size and gso_type are not set as they should be. Instead, add the UDP header length before comparing to gso_size when using UFO. In this way, it is the size of the IP payload that is compared to gso_size. Fixes: 6dd912f82680 ("net: check untrusted gso_size at kernel entry") Signed-off-by: Jonathan Davies Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index b465f8f3e554..04e87f4b9417 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -120,10 +120,15 @@ retry: if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); + unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); + /* UFO may not include transport header in gso_size. */ + if (gso_type & SKB_GSO_UDP) + nh_off -= thlen; + /* Too small packets are not really GSO ones. */ - if (skb->len - p_off > gso_size) { + if (skb->len - nh_off > gso_size) { shinfo->gso_size = gso_size; shinfo->gso_type = gso_type; -- cgit v1.2.3 From 8160fb43d55d26d64607fd32fe69185a5f5fe41f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Nov 2021 19:29:21 -0800 Subject: net: use an atomic_long_t for queue->trans_timeout tx_timeout_show() assumed dev_watchdog() would stop all the queues, to fetch queue->trans_timeout under protection of the queue->_xmit_lock. As we want to no longer disrupt transmits, we use an atomic_long_t instead. Signed-off-by: Eric Dumazet Cc: david decotigny Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dd328364dfe9..1d22483cf78c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -592,7 +592,7 @@ struct netdev_queue { * Number of TX timeouts for this queue * (/sys/class/net/DEV/Q/trans_timeout) */ - unsigned long trans_timeout; + atomic_long_t trans_timeout; /* Subordinate device that the queue has been assigned to */ struct net_device *sb_dev; -- cgit v1.2.3 From 5337824f4dc4bb26f38fbbba4ffb425a92803f15 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Nov 2021 19:29:22 -0800 Subject: net: annotate accesses to queue->trans_start In following patches, dev_watchdog() will no longer stop all queues. It will read queue->trans_start locklessly. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1d22483cf78c..279409ef2b18 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4095,10 +4095,21 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) spin_unlock_bh(&txq->_xmit_lock); } +/* + * txq->trans_start can be read locklessly from dev_watchdog() + */ static inline void txq_trans_update(struct netdev_queue *txq) { if (txq->xmit_lock_owner != -1) - txq->trans_start = jiffies; + WRITE_ONCE(txq->trans_start, jiffies); +} + +static inline void txq_trans_cond_update(struct netdev_queue *txq) +{ + unsigned long now = jiffies; + + if (READ_ONCE(txq->trans_start) != now) + WRITE_ONCE(txq->trans_start, now); } /* legacy drivers only, netdev_start_xmit() sets txq->trans_start */ @@ -4106,8 +4117,7 @@ static inline void netif_trans_update(struct net_device *dev) { struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); - if (txq->trans_start != jiffies) - txq->trans_start = jiffies; + txq_trans_cond_update(txq); } /** -- cgit v1.2.3 From dab8fe320726b38a6b1dc6a7ca6e386c5f7779e8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Nov 2021 19:29:23 -0800 Subject: net: do not inline netif_tx_lock()/netif_tx_unlock() These are not fast path, there is no point in inlining them. Also provide netif_freeze_queues()/netif_unfreeze_queues() so that we can use them from dev_watchdog() in the following patch. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 39 ++------------------------------------- 1 file changed, 2 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 279409ef2b18..4f4a299e92de 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4126,27 +4126,7 @@ static inline void netif_trans_update(struct net_device *dev) * * Get network device transmit lock */ -static inline void netif_tx_lock(struct net_device *dev) -{ - unsigned int i; - int cpu; - - spin_lock(&dev->tx_global_lock); - cpu = smp_processor_id(); - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - - /* We are the only thread of execution doing a - * freeze, but we have to grab the _xmit_lock in - * order to synchronize with threads which are in - * the ->hard_start_xmit() handler and already - * checked the frozen bit. - */ - __netif_tx_lock(txq, cpu); - set_bit(__QUEUE_STATE_FROZEN, &txq->state); - __netif_tx_unlock(txq); - } -} +void netif_tx_lock(struct net_device *dev); static inline void netif_tx_lock_bh(struct net_device *dev) { @@ -4154,22 +4134,7 @@ static inline void netif_tx_lock_bh(struct net_device *dev) netif_tx_lock(dev); } -static inline void netif_tx_unlock(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - - /* No need to grab the _xmit_lock here. If the - * queue is not stopped for another reason, we - * force a schedule. - */ - clear_bit(__QUEUE_STATE_FROZEN, &txq->state); - netif_schedule_queue(txq); - } - spin_unlock(&dev->tx_global_lock); -} +void netif_tx_unlock(struct net_device *dev); static inline void netif_tx_unlock_bh(struct net_device *dev) { -- cgit v1.2.3 From 1881eadb2041889d74d60c074eb04189c4a07dad Mon Sep 17 00:00:00 2001 From: Abhyuday Godhasara Date: Mon, 25 Oct 2021 21:25:20 -0700 Subject: firmware: xilinx: add register notifier in zynqmp firmware In zynqmp-firmware, register notifier is not supported, add support of register notifier in zynqmp-firmware. Acked-by: Michal Simek Signed-off-by: Tejas Patel Signed-off-by: Abhyuday Godhasara Link: https://lore.kernel.org/r/20211026042525.26612-2-abhyuday.godhasara@xilinx.com Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware/xlnx-zynqmp.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 47fd4e52a423..d30d39dc8cb4 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -2,7 +2,7 @@ /* * Xilinx Zynq MPSoC Firmware layer * - * Copyright (C) 2014-2019 Xilinx + * Copyright (C) 2014-2021 Xilinx * * Michal Simek * Davorin Mista @@ -66,6 +66,7 @@ enum pm_api_id { PM_GET_API_VERSION = 1, + PM_REGISTER_NOTIFIER = 5, PM_SYSTEM_SHUTDOWN = 12, PM_REQUEST_NODE = 13, PM_RELEASE_NODE = 14, @@ -427,6 +428,8 @@ int zynqmp_pm_pinctrl_get_config(const u32 pin, const u32 param, int zynqmp_pm_pinctrl_set_config(const u32 pin, const u32 param, u32 value); int zynqmp_pm_load_pdi(const u32 src, const u64 address); +int zynqmp_pm_register_notifier(const u32 node, const u32 event, + const u32 wake, const u32 enable); #else static inline int zynqmp_pm_get_api_version(u32 *version) { @@ -658,6 +661,12 @@ static inline int zynqmp_pm_load_pdi(const u32 src, const u64 address) { return -ENODEV; } + +static inline int zynqmp_pm_register_notifier(const u32 node, const u32 event, + const u32 wake, const u32 enable) +{ + return -ENODEV; +} #endif #endif /* __FIRMWARE_ZYNQMP_H__ */ -- cgit v1.2.3 From fbce9f14055e547d270046f61758c29c957e675d Mon Sep 17 00:00:00 2001 From: Abhyuday Godhasara Date: Mon, 25 Oct 2021 21:25:21 -0700 Subject: firmware: xilinx: add macros of node ids for error event Add macros for the Node-Id of Error events. Move supported api callback ids from zynqmp-power to zynqmp-firmware. Acked-by: Michal Simek Signed-off-by: Rajan Vaja Signed-off-by: Abhyuday Godhasara Link: https://lore.kernel.org/r/20211026042525.26612-3-abhyuday.godhasara@xilinx.com Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware/xlnx-zynqmp.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index d30d39dc8cb4..b0a38091db71 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -64,6 +64,20 @@ #define XILINX_ZYNQMP_PM_FPGA_FULL 0x0U #define XILINX_ZYNQMP_PM_FPGA_PARTIAL BIT(0) +/* + * Node IDs for the Error Events. + */ +#define EVENT_ERROR_PMC_ERR1 (0x28100000U) +#define EVENT_ERROR_PMC_ERR2 (0x28104000U) +#define EVENT_ERROR_PSM_ERR1 (0x28108000U) +#define EVENT_ERROR_PSM_ERR2 (0x2810C000U) + +enum pm_api_cb_id { + PM_INIT_SUSPEND_CB = 30, + PM_ACKNOWLEDGE_CB = 31, + PM_NOTIFY_CB = 32, +}; + enum pm_api_id { PM_GET_API_VERSION = 1, PM_REGISTER_NOTIFIER = 5, -- cgit v1.2.3 From f4d77525679e289d4976ca03b620ac4cc5403205 Mon Sep 17 00:00:00 2001 From: Abhyuday Godhasara Date: Mon, 25 Oct 2021 21:25:22 -0700 Subject: firmware: xilinx: export the feature check of zynqmp firmware Export the zynqmp_pm_feature(), so it can be use by other as to get API version available in firmware. Acked-by: Michal Simek Signed-off-by: Rajan Vaja Signed-off-by: Abhyuday Godhasara Link: https://lore.kernel.org/r/20211026042525.26612-4-abhyuday.godhasara@xilinx.com Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware/xlnx-zynqmp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index b0a38091db71..077e894bb340 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -444,6 +444,7 @@ int zynqmp_pm_pinctrl_set_config(const u32 pin, const u32 param, int zynqmp_pm_load_pdi(const u32 src, const u64 address); int zynqmp_pm_register_notifier(const u32 node, const u32 event, const u32 wake, const u32 enable); +int zynqmp_pm_feature(const u32 api_id); #else static inline int zynqmp_pm_get_api_version(u32 *version) { @@ -681,6 +682,11 @@ static inline int zynqmp_pm_register_notifier(const u32 node, const u32 event, { return -ENODEV; } + +static inline int zynqmp_pm_feature(const u32 api_id) +{ + return -ENODEV; +} #endif #endif /* __FIRMWARE_ZYNQMP_H__ */ -- cgit v1.2.3 From 522a0032af005502507f5f81ae64fdcc82b5d068 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 6 Nov 2021 17:13:35 -0400 Subject: Add linux/cacheflush.h Many architectures do not include asm-generic/cacheflush.h, so turn the includes on their head and add linux/cacheflush.h which includes asm/cacheflush.h. Move the flush_dcache_folio() declaration from asm-generic/cacheflush.h to linux/cacheflush.h and change linux/highmem.h to include linux/cacheflush.h instead of asm/cacheflush.h so that all necessary places will see flush_dcache_folio(). More functions should have their default implementations moved in the future, but those are for follow-on patches. This fixes csky, sparc and sparc64 which were missed in the commit which added flush_dcache_folio(). Fixes: 08b0b0059bf1 ("mm: Add flush_dcache_folio()") Suggested-by: Christoph Hellwig Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Geert Uytterhoeven --- include/asm-generic/cacheflush.h | 6 ------ include/linux/cacheflush.h | 18 ++++++++++++++++++ include/linux/highmem.h | 3 +-- 3 files changed, 19 insertions(+), 8 deletions(-) create mode 100644 include/linux/cacheflush.h (limited to 'include') diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h index fedc0dfa4877..4f07afacbc23 100644 --- a/include/asm-generic/cacheflush.h +++ b/include/asm-generic/cacheflush.h @@ -50,13 +50,7 @@ static inline void flush_dcache_page(struct page *page) { } -static inline void flush_dcache_folio(struct folio *folio) { } #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO -#endif - -#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO -void flush_dcache_folio(struct folio *folio); #endif #ifndef flush_dcache_mmap_lock diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h new file mode 100644 index 000000000000..fef8b607f97e --- /dev/null +++ b/include/linux/cacheflush.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CACHEFLUSH_H +#define _LINUX_CACHEFLUSH_H + +#include + +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO +void flush_dcache_folio(struct folio *folio); +#endif +#else +static inline void flush_dcache_folio(struct folio *folio) +{ +} +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO 0 +#endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */ + +#endif /* _LINUX_CACHEFLUSH_H */ diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 25aff0f2ed0b..c944b3b70ee7 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -5,12 +5,11 @@ #include #include #include +#include #include #include #include -#include - #include "highmem-internal.h" /** -- cgit v1.2.3 From 9c3252152e8a6401c2b9e32490a5a16ec4472778 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 16 Nov 2021 21:17:14 -0500 Subject: mm: Rename folio_test_multi to folio_test_large This is a better name. Also add kernel-doc. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/page-flags.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 52ec4b5e5615..05510118fbb8 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -692,7 +692,13 @@ static inline bool folio_test_single(struct folio *folio) return !folio_test_head(folio); } -static inline bool folio_test_multi(struct folio *folio) +/** + * folio_test_large() - Does this folio contain more than one page? + * @folio: The folio to test. + * + * Return: True if the folio is larger than one page. + */ +static inline bool folio_test_large(struct folio *folio) { return folio_test_head(folio); } -- cgit v1.2.3 From a1efe484dd8c04c4c2d4eb1ee6b04d01cfc07ccc Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 16 Nov 2021 21:18:52 -0500 Subject: mm: Remove folio_test_single There's no need for this predicate; callers can just use !folio_test_large(). Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/page-flags.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 05510118fbb8..b5f14d581113 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -686,12 +686,6 @@ static inline bool test_set_page_writeback(struct page *page) __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) -/* Whether there are one or multiple pages in a folio */ -static inline bool folio_test_single(struct folio *folio) -{ - return !folio_test_head(folio); -} - /** * folio_test_large() - Does this folio contain more than one page? * @folio: The folio to test. -- cgit v1.2.3 From ff36da69bc90d80b0c73f47f4b2e270b3ff6da99 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 29 Aug 2021 06:07:03 -0400 Subject: fs: Remove FS_THP_SUPPORT Instead of setting a bit in the fs_flags to set a bit in the address_space, set the bit in the address_space directly. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- include/linux/fs.h | 1 - include/linux/pagemap.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1cb616fc1105..bbf812ce89a8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2518,7 +2518,6 @@ struct file_system_type { #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ -#define FS_THP_SUPPORT 8192 /* Remove once all fs converted */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 1a0c646eb6ff..9e33878bf23b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -176,6 +176,22 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) m->gfp_mask = mask; } +/** + * mapping_set_large_folios() - Indicate the file supports large folios. + * @mapping: The file. + * + * The filesystem should call this function in its inode constructor to + * indicate that the VFS can use large folios to cache the contents of + * the file. + * + * Context: This should not be called while the inode is active as it + * is non-atomic. + */ +static inline void mapping_set_large_folios(struct address_space *mapping) +{ + __set_bit(AS_THP_SUPPORT, &mapping->flags); +} + static inline bool mapping_thp_support(struct address_space *mapping) { return test_bit(AS_THP_SUPPORT, &mapping->flags); -- cgit v1.2.3 From ed2145c474c9015bc634e35f6d1a9b7767f3fbfc Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 29 Aug 2021 06:28:19 -0400 Subject: fs: Rename AS_THP_SUPPORT and mapping_thp_support These are now indicators of large folio support, not THP support. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/pagemap.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 9e33878bf23b..605246452305 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -84,7 +84,7 @@ enum mapping_flags { AS_EXITING = 4, /* final truncate in progress */ /* writeback related tags are not used */ AS_NO_WRITEBACK_TAGS = 5, - AS_THP_SUPPORT = 6, /* THPs supported */ + AS_LARGE_FOLIO_SUPPORT = 6, }; /** @@ -189,12 +189,12 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) */ static inline void mapping_set_large_folios(struct address_space *mapping) { - __set_bit(AS_THP_SUPPORT, &mapping->flags); + __set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); } -static inline bool mapping_thp_support(struct address_space *mapping) +static inline bool mapping_large_folio_support(struct address_space *mapping) { - return test_bit(AS_THP_SUPPORT, &mapping->flags); + return test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); } static inline int filemap_nr_thps(struct address_space *mapping) @@ -209,7 +209,7 @@ static inline int filemap_nr_thps(struct address_space *mapping) static inline void filemap_nr_thps_inc(struct address_space *mapping) { #ifdef CONFIG_READ_ONLY_THP_FOR_FS - if (!mapping_thp_support(mapping)) + if (!mapping_large_folio_support(mapping)) atomic_inc(&mapping->nr_thps); #else WARN_ON_ONCE(1); @@ -219,7 +219,7 @@ static inline void filemap_nr_thps_inc(struct address_space *mapping) static inline void filemap_nr_thps_dec(struct address_space *mapping) { #ifdef CONFIG_READ_ONLY_THP_FOR_FS - if (!mapping_thp_support(mapping)) + if (!mapping_large_folio_support(mapping)) atomic_dec(&mapping->nr_thps); #else WARN_ON_ONCE(1); -- cgit v1.2.3 From 75082e7f46809432131749f4ecea66864d0f7438 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Nov 2021 02:01:30 -0800 Subject: net: add missing include in include/net/gro.h This is needed for some arches, as reported by Geert Uytterhoeven, Randy Dunlap and Stephen Rothwell Fixes: 4721031c3559 ("net: move gro definitions to include/net/gro.h") Signed-off-by: Eric Dumazet Cc: Stephen Rothwell Cc: Randy Dunlap Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20211117100130.2368319-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/gro.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/gro.h b/include/net/gro.h index d0e7df691a80..9c22a010369c 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 5768d8906bc23d512b1a736c1e198aa833a6daa4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 15 Nov 2021 13:47:13 -0600 Subject: signal: Requeue signals in the appropriate queue In the event that a tracer changes which signal needs to be delivered and that signal is currently blocked then the signal needs to be requeued for later delivery. With the advent of CLONE_THREAD the kernel has 2 signal queues per task. The per process queue and the per task queue. Update the code so that if the signal is removed from the per process queue it is requeued on the per process queue. This is necessary to make it appear the signal was never dequeued. The rr debugger reasonably believes that the state of the process from the last ptrace_stop it observed until PTRACE_EVENT_EXIT can be recreated by simply letting a process run. If a SIGKILL interrupts a ptrace_stop this is not true today. So return signals to their original queue in ptrace_signal so that signals that are not delivered appear like they were never dequeued. Fixes: 794aa320b79d ("[PATCH] sigfix-2.5.40-D6") History Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.gi Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/87zgq4d5r4.fsf_-_@email.froward.int.ebiederm.org Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 23505394ef70..167995d471da 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -286,17 +286,18 @@ static inline int signal_group_exit(const struct signal_struct *sig) extern void flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); -extern int dequeue_signal(struct task_struct *task, - sigset_t *mask, kernel_siginfo_t *info); +extern int dequeue_signal(struct task_struct *task, sigset_t *mask, + kernel_siginfo_t *info, enum pid_type *type); static inline int kernel_dequeue_signal(void) { struct task_struct *task = current; kernel_siginfo_t __info; + enum pid_type __type; int ret; spin_lock_irq(&task->sighand->siglock); - ret = dequeue_signal(task, &task->blocked, &__info); + ret = dequeue_signal(task, &task->blocked, &__info, &__type); spin_unlock_irq(&task->sighand->siglock); return ret; -- cgit v1.2.3 From e0dbd7b0ed021fb9250f7ba4d759325678efefb5 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 16 Nov 2021 23:44:28 +0100 Subject: power: supply: core: Add kerneldoc to battery struct This complements the struct power_supply_battery_info with extensive kerneldoc explaining the different semantics of the fields, including an overview of the CC/CV charging concepts implicit in some of the struct members. This is done to first establish semantics before I can add more charging methods by breaking out the CC/CV parameters to its own struct. Tested-by: Randy Dunlap Acked-by: Randy Dunlap Reviewed-by: Matti Vaittinen Signed-off-by: Linus Walleij Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 215 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 192 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 2d1318fe2455..f6e94eae4f28 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -343,37 +343,206 @@ struct power_supply_resistance_temp_table { #define POWER_SUPPLY_OCV_TEMP_MAX 20 -/* +/** + * struct power_supply_battery_info - information about batteries + * @technology: from the POWER_SUPPLY_TECHNOLOGY_* enum + * @energy_full_design_uwh: energy content when fully charged in microwatt + * hours + * @charge_full_design_uah: charge content when fully charged in microampere + * hours + * @voltage_min_design_uv: minimum voltage across the poles when the battery + * is at minimum voltage level in microvolts. If the voltage drops below this + * level the battery will need precharging when using CC/CV charging. + * @voltage_max_design_uv: voltage across the poles when the battery is fully + * charged in microvolts. This is the "nominal voltage" i.e. the voltage + * printed on the label of the battery. + * @tricklecharge_current_ua: the tricklecharge current used when trickle + * charging the battery in microamperes. This is the charging phase when the + * battery is completely empty and we need to carefully trickle in some + * charge until we reach the precharging voltage. + * @precharge_current_ua: current to use in the precharge phase in microamperes, + * the precharge rate is limited by limiting the current to this value. + * @precharge_voltage_max_uv: the maximum voltage allowed when precharging in + * microvolts. When we pass this voltage we will nominally switch over to the + * CC (constant current) charging phase defined by constant_charge_current_ua + * and constant_charge_voltage_max_uv. + * @charge_term_current_ua: when the current in the CV (constant voltage) + * charging phase drops below this value in microamperes the charging will + * terminate completely and not restart until the voltage over the battery + * poles reach charge_restart_voltage_uv unless we use maintenance charging. + * @charge_restart_voltage_uv: when the battery has been fully charged by + * CC/CV charging and charging has been disabled, and the voltage subsequently + * drops below this value in microvolts, the charging will be restarted + * (typically using CV charging). + * @overvoltage_limit_uv: If the voltage exceeds the nominal voltage + * voltage_max_design_uv and we reach this voltage level, all charging must + * stop and emergency procedures take place, such as shutting down the system + * in some cases. + * @constant_charge_current_max_ua: current in microamperes to use in the CC + * (constant current) charging phase. The charging rate is limited + * by this current. This is the main charging phase and as the current is + * constant into the battery the voltage slowly ascends to + * constant_charge_voltage_max_uv. + * @constant_charge_voltage_max_uv: voltage in microvolts signifying the end of + * the CC (constant current) charging phase and the beginning of the CV + * (constant voltage) charging phase. + * @factory_internal_resistance_uohm: the internal resistance of the battery + * at fabrication time, expressed in microohms. This resistance will vary + * depending on the lifetime and charge of the battery, so this is just a + * nominal ballpark figure. + * @ocv_temp: array indicating the open circuit voltage (OCV) capacity + * temperature indices. This is an array of temperatures in degrees Celsius + * indicating which capacity table to use for a certain temperature, since + * the capacity for reasons of chemistry will be different at different + * temperatures. Determining capacity is a multivariate problem and the + * temperature is the first variable we determine. + * @temp_ambient_alert_min: the battery will go outside of operating conditions + * when the ambient temperature goes below this temperature in degrees + * Celsius. + * @temp_ambient_alert_max: the battery will go outside of operating conditions + * when the ambient temperature goes above this temperature in degrees + * Celsius. + * @temp_alert_min: the battery should issue an alert if the internal + * temperature goes below this temperature in degrees Celsius. + * @temp_alert_max: the battery should issue an alert if the internal + * temperature goes above this temperature in degrees Celsius. + * @temp_min: the battery will go outside of operating conditions when + * the internal temperature goes below this temperature in degrees Celsius. + * Normally this means the system should shut down. + * @temp_max: the battery will go outside of operating conditions when + * the internal temperature goes above this temperature in degrees Celsius. + * Normally this means the system should shut down. + * @ocv_table: for each entry in ocv_temp there is a corresponding entry in + * ocv_table and a size for each entry in ocv_table_size. These arrays + * determine the capacity in percent in relation to the voltage in microvolts + * at the indexed temperature. + * @ocv_table_size: for each entry in ocv_temp this array is giving the size of + * each entry in the array of capacity arrays in ocv_table. + * @resist_table: this is a table that correlates a battery temperature to the + * expected internal resistance at this temperature. The resistance is given + * as a percentage of factory_internal_resistance_uohm. Knowing the + * resistance of the battery is usually necessary for calculating the open + * circuit voltage (OCV) that is then used with the ocv_table to calculate + * the capacity of the battery. The resist_table must be ordered descending + * by temperature: highest temperature with lowest resistance first, lowest + * temperature with highest resistance last. + * @resist_table_size: the number of items in the resist_table. + * * This is the recommended struct to manage static battery parameters, * populated by power_supply_get_battery_info(). Most platform drivers should * use these for consistency. + * * Its field names must correspond to elements in enum power_supply_property. * The default field value is -EINVAL. - * Power supply class itself doesn't use this. + * + * The charging parameters here assume a CC/CV charging scheme. This method + * is most common with Lithium Ion batteries (other methods are possible) and + * looks as follows: + * + * ^ Battery voltage + * | --- overvoltage_limit_uv + * | + * | ................................................... + * | .. constant_charge_voltage_max_uv + * | .. + * | . + * | . + * | . + * | . + * | . + * | .. precharge_voltage_max_uv + * | .. + * |. (trickle charging) + * +------------------------------------------------------------------> time + * + * ^ Current into the battery + * | + * | ............. constant_charge_current_max_ua + * | . . + * | . . + * | . . + * | . . + * | . .. + * | . .... + * | . ..... + * | ... precharge_current_ua ....... charge_term_current_ua + * | . . + * | . . + * |.... tricklecharge_current_ua . + * | . + * +-----------------------------------------------------------------> time + * + * These diagrams are synchronized on time and the voltage and current + * follow each other. + * + * With CC/CV charging commence over time like this for an empty battery: + * + * 1. When the battery is completely empty it may need to be charged with + * an especially small current so that electrons just "trickle in", + * this is the tricklecharge_current_ua. + * + * 2. Next a small initial pre-charge current (precharge_current_ua) + * is applied if the voltage is below precharge_voltage_max_uv until we + * reach precharge_voltage_max_uv. CAUTION: in some texts this is referred + * to as "trickle charging" but the use in the Linux kernel is different + * see below! + * + * 3. Then the main charging current is applied, which is called the constant + * current (CC) phase. A current regulator is set up to allow + * constant_charge_current_max_ua of current to flow into the battery. + * The chemical reaction in the battery will make the voltage go up as + * charge goes into the battery. This current is applied until we reach + * the constant_charge_voltage_max_uv voltage. + * + * 4. At this voltage we switch over to the constant voltage (CV) phase. This + * means we allow current to go into the battery, but we keep the voltage + * fixed. This current will continue to charge the battery while keeping + * the voltage the same. A chemical reaction in the battery goes on + * storing energy without affecting the voltage. Over time the current + * will slowly drop and when we reach charge_term_current_ua we will + * end the constant voltage phase. + * + * After this the battery is fully charged, and if we do not support maintenance + * charging, the charging will not restart until power dissipation makes the + * voltage fall so that we reach charge_restart_voltage_uv and at this point + * we restart charging at the appropriate phase, usually this will be inside + * the CV phase. + * + * If we support maintenance charging the voltage is however kept high after + * the CV phase with a very low current. This is meant to let the same charge + * go in for usage while the charger is still connected, mainly for + * dissipation for the power consuming entity while connected to the + * charger. + * + * All charging MUST terminate if the overvoltage_limit_uv is ever reached. + * Overcharging Lithium Ion cells can be DANGEROUS and lead to fire or + * explosions. + * + * The power supply class itself doesn't use this struct as of now. */ struct power_supply_battery_info { - unsigned int technology; /* from the enum above */ - int energy_full_design_uwh; /* microWatt-hours */ - int charge_full_design_uah; /* microAmp-hours */ - int voltage_min_design_uv; /* microVolts */ - int voltage_max_design_uv; /* microVolts */ - int tricklecharge_current_ua; /* microAmps */ - int precharge_current_ua; /* microAmps */ - int precharge_voltage_max_uv; /* microVolts */ - int charge_term_current_ua; /* microAmps */ - int charge_restart_voltage_uv; /* microVolts */ - int overvoltage_limit_uv; /* microVolts */ - int constant_charge_current_max_ua; /* microAmps */ - int constant_charge_voltage_max_uv; /* microVolts */ - int factory_internal_resistance_uohm; /* microOhms */ - int ocv_temp[POWER_SUPPLY_OCV_TEMP_MAX];/* celsius */ - int temp_ambient_alert_min; /* celsius */ - int temp_ambient_alert_max; /* celsius */ - int temp_alert_min; /* celsius */ - int temp_alert_max; /* celsius */ - int temp_min; /* celsius */ - int temp_max; /* celsius */ + unsigned int technology; + int energy_full_design_uwh; + int charge_full_design_uah; + int voltage_min_design_uv; + int voltage_max_design_uv; + int tricklecharge_current_ua; + int precharge_current_ua; + int precharge_voltage_max_uv; + int charge_term_current_ua; + int charge_restart_voltage_uv; + int overvoltage_limit_uv; + int constant_charge_current_max_ua; + int constant_charge_voltage_max_uv; + int factory_internal_resistance_uohm; + int ocv_temp[POWER_SUPPLY_OCV_TEMP_MAX]; + int temp_ambient_alert_min; + int temp_ambient_alert_max; + int temp_alert_min; + int temp_alert_max; + int temp_min; + int temp_max; struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX]; int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX]; struct power_supply_resistance_temp_table *resist_table; -- cgit v1.2.3 From efb931cdc4b94a0f7ed17a76844f08cef1bdffe5 Mon Sep 17 00:00:00 2001 From: Ajit Kumar Pandey Date: Wed, 17 Nov 2021 11:37:24 +0200 Subject: ASoC: SOF: topology: Add support for AMD ACP DAIs Add new sof dais and config to pass topology file configuration to SOF firmware running on ACP's DSP core. ACP firmware support I2S_BT, I2S_SP and DMIC controller hence add three new dais to the list of supported sof_dais Signed-off-by: Ajit Kumar Pandey Reviewed-by: Bard Liao Reviewed-by: Kai Vehmanen Signed-off-by: Daniel Baluta Link: https://lore.kernel.org/r/20211117093734.17407-12-daniel.baluta@oss.nxp.com Signed-off-by: Mark Brown --- include/sound/sof/dai-amd.h | 21 +++++++++++++++++++++ include/sound/sof/dai.h | 7 +++++++ 2 files changed, 28 insertions(+) create mode 100644 include/sound/sof/dai-amd.h (limited to 'include') diff --git a/include/sound/sof/dai-amd.h b/include/sound/sof/dai-amd.h new file mode 100644 index 000000000000..90d09dbdd709 --- /dev/null +++ b/include/sound/sof/dai-amd.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * Copyright(c) 2021 Advanced Micro Devices, Inc.. All rights reserved. + */ + +#ifndef __INCLUDE_SOUND_SOF_DAI_AMD_H__ +#define __INCLUDE_SOUND_SOF_DAI_AMD_H__ + +#include + +/* ACP Configuration Request - SOF_IPC_DAI_AMD_CONFIG */ +struct sof_ipc_dai_acp_params { + struct sof_ipc_hdr hdr; + + uint32_t fsync_rate; /* FSYNC frequency in Hz */ + uint32_t tdm_slots; +} __packed; +#endif diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h index 9625f47557b8..3782127a7095 100644 --- a/include/sound/sof/dai.h +++ b/include/sound/sof/dai.h @@ -12,6 +12,7 @@ #include #include #include +#include /* * DAI Configuration. @@ -66,6 +67,9 @@ enum sof_ipc_dai_type { SOF_DAI_INTEL_ALH, /**< Intel ALH */ SOF_DAI_IMX_SAI, /**< i.MX SAI */ SOF_DAI_IMX_ESAI, /**< i.MX ESAI */ + SOF_DAI_AMD_BT, /**< AMD ACP BT*/ + SOF_DAI_AMD_SP, /**< AMD ACP SP */ + SOF_DAI_AMD_DMIC, /**< AMD ACP DMIC */ }; /* general purpose DAI configuration */ @@ -90,6 +94,9 @@ struct sof_ipc_dai_config { struct sof_ipc_dai_alh_params alh; struct sof_ipc_dai_esai_params esai; struct sof_ipc_dai_sai_params sai; + struct sof_ipc_dai_acp_params acpbt; + struct sof_ipc_dai_acp_params acpsp; + struct sof_ipc_dai_acp_params acpdmic; }; } __packed; -- cgit v1.2.3 From 6bb835f3d00467c9a5e35f4955afa29df96a404e Mon Sep 17 00:00:00 2001 From: Andriy Tryshnivskyy Date: Sun, 24 Oct 2021 12:16:26 +0300 Subject: iio: core: Introduce IIO_VAL_INT_64. Introduce IIO_VAL_INT_64 to read 64-bit value for channel attribute. Val is used as lower 32 bits. Signed-off-by: Andriy Tryshnivskyy Link: https://lore.kernel.org/r/20211024091627.28031-2-andriy.tryshnivskyy@opensynergy.com Signed-off-by: Jonathan Cameron --- include/linux/iio/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index 84b3f8175cc6..a7aa91f3a8dc 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -24,6 +24,7 @@ enum iio_event_info { #define IIO_VAL_INT_PLUS_NANO 3 #define IIO_VAL_INT_PLUS_MICRO_DB 4 #define IIO_VAL_INT_MULTIPLE 5 +#define IIO_VAL_INT_64 6 /* 64-bit data, val is lower 32 bits */ #define IIO_VAL_FRACTIONAL 10 #define IIO_VAL_FRACTIONAL_LOG2 11 #define IIO_VAL_CHAR 12 -- cgit v1.2.3 From b5f57384805a34f497edb8b04d694a8a1b3d81d4 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 10 Sep 2021 15:18:20 -0400 Subject: drm/amdkfd: Add sysfs bitfields and enums to uAPI These bits are de-facto part of the uAPI, so declare them in a uAPI header. The corresponding bit-fields and enums in user mode are defined in https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/master/include/hsakmttypes.h HSA_CAP_... -> HSA_CAPABILITY HSA_MEM_HEAP_TYPE_... -> HSA_HEAPTYPE HSA_MEM_FLAGS_... -> HSA_MEMORYPROPERTY HSA_CACHE_TYPE_... -> HsaCacheType HSA_IOLINK_TYPE_... -> HSA_IOLINKTYPE HSA_IOLINK_FLAGS_... -> HSA_LINKPROPERTY Signed-off-by: Felix Kuehling Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_sysfs.h | 108 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 include/uapi/linux/kfd_sysfs.h (limited to 'include') diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h new file mode 100644 index 000000000000..e1fb78b4bf09 --- /dev/null +++ b/include/uapi/linux/kfd_sysfs.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT WITH Linux-syscall-note */ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef KFD_SYSFS_H_INCLUDED +#define KFD_SYSFS_H_INCLUDED + +/* Capability bits in node properties */ +#define HSA_CAP_HOT_PLUGGABLE 0x00000001 +#define HSA_CAP_ATS_PRESENT 0x00000002 +#define HSA_CAP_SHARED_WITH_GRAPHICS 0x00000004 +#define HSA_CAP_QUEUE_SIZE_POW2 0x00000008 +#define HSA_CAP_QUEUE_SIZE_32BIT 0x00000010 +#define HSA_CAP_QUEUE_IDLE_EVENT 0x00000020 +#define HSA_CAP_VA_LIMIT 0x00000040 +#define HSA_CAP_WATCH_POINTS_SUPPORTED 0x00000080 +#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 +#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 +#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000 +#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12 + +#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 +#define HSA_CAP_DOORBELL_TYPE_1_0 0x1 +#define HSA_CAP_DOORBELL_TYPE_2_0 0x2 +#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 + +/* Old buggy user mode depends on this being 0 */ +#define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED 0x00080000 + +#define HSA_CAP_MEM_EDCSUPPORTED 0x00100000 +#define HSA_CAP_RASEVENTNOTIFY 0x00200000 +#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000 +#define HSA_CAP_ASIC_REVISION_SHIFT 22 +#define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000 +#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000 +#define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000 +#define HSA_CAP_RESERVED 0xe00f8000 + +/* Heap types in memory properties */ +#define HSA_MEM_HEAP_TYPE_SYSTEM 0 +#define HSA_MEM_HEAP_TYPE_FB_PUBLIC 1 +#define HSA_MEM_HEAP_TYPE_FB_PRIVATE 2 +#define HSA_MEM_HEAP_TYPE_GPU_GDS 3 +#define HSA_MEM_HEAP_TYPE_GPU_LDS 4 +#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5 + +/* Flag bits in memory properties */ +#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001 +#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002 +#define HSA_MEM_FLAGS_RESERVED 0xfffffffc + +/* Cache types in cache properties */ +#define HSA_CACHE_TYPE_DATA 0x00000001 +#define HSA_CACHE_TYPE_INSTRUCTION 0x00000002 +#define HSA_CACHE_TYPE_CPU 0x00000004 +#define HSA_CACHE_TYPE_HSACU 0x00000008 +#define HSA_CACHE_TYPE_RESERVED 0xfffffff0 + +/* Link types in IO link properties (matches CRAT link types) */ +#define HSA_IOLINK_TYPE_UNDEFINED 0 +#define HSA_IOLINK_TYPE_HYPERTRANSPORT 1 +#define HSA_IOLINK_TYPE_PCIEXPRESS 2 +#define HSA_IOLINK_TYPE_AMBA 3 +#define HSA_IOLINK_TYPE_MIPI 4 +#define HSA_IOLINK_TYPE_QPI_1_1 5 +#define HSA_IOLINK_TYPE_RESERVED1 6 +#define HSA_IOLINK_TYPE_RESERVED2 7 +#define HSA_IOLINK_TYPE_RAPID_IO 8 +#define HSA_IOLINK_TYPE_INFINIBAND 9 +#define HSA_IOLINK_TYPE_RESERVED3 10 +#define HSA_IOLINK_TYPE_XGMI 11 +#define HSA_IOLINK_TYPE_XGOP 12 +#define HSA_IOLINK_TYPE_GZ 13 +#define HSA_IOLINK_TYPE_ETHERNET_RDMA 14 +#define HSA_IOLINK_TYPE_RDMA_OTHER 15 +#define HSA_IOLINK_TYPE_OTHER 16 + +/* Flag bits in IO link properties (matches CRAT flags, excluding the + * bi-directional flag, which is not offially part of the CRAT spec, and + * only used internally in KFD) + */ +#define HSA_IOLINK_FLAGS_ENABLED (1 << 0) +#define HSA_IOLINK_FLAGS_NON_COHERENT (1 << 1) +#define HSA_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2) +#define HSA_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3) +#define HSA_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4) +#define HSA_IOLINK_FLAGS_RESERVED 0xffffffe0 + +#endif -- cgit v1.2.3 From 2925748eadc33cba3bded7b69475a1b002b124ac Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 17 Nov 2021 13:22:53 +0000 Subject: firmware: cs_dsp: Add version checks on coefficient loading The firmware coefficient files contain version information that is currently ignored by the cs_dsp code. This information specifies which version of the firmware the coefficient were generated for. Add a check into the code which prints a warning in the case the coefficient and firmware differ in version, in many cases this will be ok but it is not always, so best to let the user know there is a potential issue. Co-authored-by: Simon Trimmer Signed-off-by: Simon Trimmer Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20211117132300.1290-3-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/firmware/cirrus/cs_dsp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h index 3a54b1afc48f..ce54705e2bec 100644 --- a/include/linux/firmware/cirrus/cs_dsp.h +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -54,12 +54,14 @@ struct cs_dsp_region { * struct cs_dsp_alg_region - Describes a logical algorithm region in DSP address space * @list: List node for internal use * @alg: Algorithm id + * @ver: Expected algorithm version * @type: Memory region type * @base: Address of region */ struct cs_dsp_alg_region { struct list_head list; unsigned int alg; + unsigned int ver; int type; unsigned int base; }; -- cgit v1.2.3 From 14055b5a3a23204c4702ae5d3f2a819ee081ce33 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 17 Nov 2021 13:22:54 +0000 Subject: firmware: cs_dsp: Add pre_run callback The code already has a post_run callback, add a matching pre_run callback to the client_ops that is called before execution is started. This callback provides a convenient place for the client code to set DSP controls or hardware that requires configuration before the DSP core actually starts execution. Note that placing this callback before cs_dsp_coeff_sync_controls is important to ensure that any control values are then correctly synced out to the chip. Co-authored-by: Simon Trimmer Signed-off-by: Simon Trimmer Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20211117132300.1290-4-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/firmware/cirrus/cs_dsp.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h index ce54705e2bec..0bf849baeaa5 100644 --- a/include/linux/firmware/cirrus/cs_dsp.h +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -187,7 +187,8 @@ struct cs_dsp { * struct cs_dsp_client_ops - client callbacks * @control_add: Called under the pwr_lock when a control is created * @control_remove: Called under the pwr_lock when a control is destroyed - * @post_run: Called under the pwr_lock by cs_dsp_run() + * @pre_run: Called under the pwr_lock by cs_dsp_run() before the core is started + * @post_run: Called under the pwr_lock by cs_dsp_run() after the core is started * @post_stop: Called under the pwr_lock by cs_dsp_stop() * @watchdog_expired: Called when a watchdog expiry is detected * @@ -197,6 +198,7 @@ struct cs_dsp { struct cs_dsp_client_ops { int (*control_add)(struct cs_dsp_coeff_ctl *ctl); void (*control_remove)(struct cs_dsp_coeff_ctl *ctl); + int (*pre_run)(struct cs_dsp *dsp); int (*post_run)(struct cs_dsp *dsp); void (*post_stop)(struct cs_dsp *dsp); void (*watchdog_expired)(struct cs_dsp *dsp); -- cgit v1.2.3 From b329b3d39497a9fdb175d7e4fd77ae7170d5d26c Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 17 Nov 2021 13:22:58 +0000 Subject: firmware: cs_dsp: Clarify some kernel doc comments Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20211117132300.1290-8-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/firmware/cirrus/cs_dsp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h index 0bf849baeaa5..1ad1b173417a 100644 --- a/include/linux/firmware/cirrus/cs_dsp.h +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -76,8 +76,8 @@ struct cs_dsp_alg_region { * @enabled: Flag indicating whether control is enabled * @list: List node for internal use * @cache: Cached value of the control - * @offset: Offset of control within alg_region - * @len: Length of the cached value + * @offset: Offset of control within alg_region in words + * @len: Length of the cached value in bytes * @set: Flag indicating the value has been written by the user * @flags: Bitfield of WMFW_CTL_FLAG_ control flags defined in wmfw.h * @type: One of the WMFW_CTL_TYPE_ control types defined in wmfw.h -- cgit v1.2.3 From f444da38ac924748de696c393327a44c4b8d727e Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 17 Nov 2021 13:22:59 +0000 Subject: firmware: cs_dsp: Add offset to cs_dsp read/write Provide a mechanism to access only part of a control through the cs_dsp interface. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20211117132300.1290-9-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/firmware/cirrus/cs_dsp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h index 1ad1b173417a..38b4da3ddfe4 100644 --- a/include/linux/firmware/cirrus/cs_dsp.h +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -232,8 +232,10 @@ void cs_dsp_init_debugfs(struct cs_dsp *dsp, struct dentry *debugfs_root); void cs_dsp_cleanup_debugfs(struct cs_dsp *dsp); int cs_dsp_coeff_write_acked_control(struct cs_dsp_coeff_ctl *ctl, unsigned int event_id); -int cs_dsp_coeff_write_ctrl(struct cs_dsp_coeff_ctl *ctl, const void *buf, size_t len); -int cs_dsp_coeff_read_ctrl(struct cs_dsp_coeff_ctl *ctl, void *buf, size_t len); +int cs_dsp_coeff_write_ctrl(struct cs_dsp_coeff_ctl *ctl, unsigned int off, + const void *buf, size_t len); +int cs_dsp_coeff_read_ctrl(struct cs_dsp_coeff_ctl *ctl, unsigned int off, + void *buf, size_t len); struct cs_dsp_coeff_ctl *cs_dsp_get_ctl(struct cs_dsp *dsp, const char *name, int type, unsigned int alg); -- cgit v1.2.3 From 5c903f64ce97172d63f7591cfa9e37cba58867b2 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 17 Nov 2021 13:23:00 +0000 Subject: firmware: cs_dsp: Allow creation of event controls Some firmwares contain controls intended to convey firmware state back to the host. Whilst more infrastructure will probably be needed for these in time, as a first step allow creation of the controls, so said firmwares arn't completely rejected. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20211117132300.1290-10-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/firmware/cirrus/wmfw.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/firmware/cirrus/wmfw.h b/include/linux/firmware/cirrus/wmfw.h index a19bf7c6fc8b..74e5a4f6c13a 100644 --- a/include/linux/firmware/cirrus/wmfw.h +++ b/include/linux/firmware/cirrus/wmfw.h @@ -29,6 +29,7 @@ #define WMFW_CTL_TYPE_ACKED 0x1000 /* acked control */ #define WMFW_CTL_TYPE_HOSTEVENT 0x1001 /* event control */ #define WMFW_CTL_TYPE_HOST_BUFFER 0x1002 /* host buffer pointer */ +#define WMFW_CTL_TYPE_FWEVENT 0x1004 /* firmware event control */ struct wmfw_header { char magic[4]; -- cgit v1.2.3 From f58a435311672305d8747f40e35235f7ed64ae69 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Fri, 5 Nov 2021 14:33:41 -0400 Subject: drm/dp, drm/i915: Add support for VESA backlights using PWM for brightness control Now that we've added support to i915 for controlling panel backlights that need PWM to be enabled/disabled, let's finalize this and add support for controlling brightness levels via PWM as well. This should hopefully put us towards the path of supporting _ALL_ backlights via VESA's DPCD interface which would allow us to finally start trusting the DPCD again. Note however that we still don't enable using this by default on i915 when it's not needed, primarily because I haven't yet had a chance to confirm if it's safe to do this on the one machine in Intel's CI that had an issue with this: samus-fi-bdw. I have done basic testing of this on other machines though, by manually patching i915 to force it into PWM-only mode on some of my laptops. v2: * Correct documentation (thanks Doug!) * Get rid of backlight caps Signed-off-by: Lyude Paul Reviewed-by: Doug Anderson Cc: Rajeev Nandan Cc: Satadru Pramanik Link: https://patchwork.freedesktop.org/patch/msgid/20211105183342.130810-5-lyude@redhat.com --- include/drm/drm_dp_helper.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index afdf7f4183f9..8b2ed4199284 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -1868,7 +1868,7 @@ drm_dp_sink_can_do_video_without_timing_msa(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) * * Note that currently this function will return %false for panels which support various DPCD * backlight features but which require the brightness be set through PWM, and don't support setting - * the brightness level via the DPCD. This is a TODO. + * the brightness level via the DPCD. * * Returns: %True if @edp_dpcd indicates that VESA backlight controls are supported, %false * otherwise @@ -1876,8 +1876,7 @@ drm_dp_sink_can_do_video_without_timing_msa(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) static inline bool drm_edp_backlight_supported(const u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]) { - return (edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP) && - (edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP); + return !!(edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP); } /* @@ -2238,6 +2237,7 @@ drm_dp_has_quirk(const struct drm_dp_desc *desc, enum drm_dp_quirk quirk) * @max: The maximum backlight level that may be set * @lsb_reg_used: Do we also write values to the DP_EDP_BACKLIGHT_BRIGHTNESS_LSB register? * @aux_enable: Does the panel support the AUX enable cap? + * @aux_set: Does the panel support setting the brightness through AUX? * * This structure contains various data about an eDP backlight, which can be populated by using * drm_edp_backlight_init(). @@ -2249,6 +2249,7 @@ struct drm_edp_backlight_info { bool lsb_reg_used : 1; bool aux_enable : 1; + bool aux_set : 1; }; int -- cgit v1.2.3 From 268bb03856ed6c8511c31d08de0148782f50822f Mon Sep 17 00:00:00 2001 From: Thiago Rafael Becker Date: Wed, 17 Nov 2021 10:26:30 -0300 Subject: sunrpc: fix header include guard in trace header rpcgss.h include protection was protecting against the define for rpcrdma.h. Signed-off-by: Thiago Rafael Becker Signed-off-by: Trond Myklebust --- include/trace/events/rpcgss.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index 3ba63319af3c..c9048f3e471b 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -8,7 +8,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM rpcgss -#if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ) +#if !defined(_TRACE_RPCGSS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_RPCGSS_H #include -- cgit v1.2.3 From 48b71a9e66c2eab60564b1b1c85f4928ed04e406 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Tue, 16 Nov 2021 23:27:32 +0800 Subject: NFC: add NCI_UNREG flag to eliminate the race There are two sites that calls queue_work() after the destroy_workqueue() and lead to possible UAF. The first site is nci_send_cmd(), which can happen after the nci_close_device as below nfcmrvl_nci_unregister_dev | nfc_genl_dev_up nci_close_device | flush_workqueue | del_timer_sync | nci_unregister_device | nfc_get_device destroy_workqueue | nfc_dev_up nfc_unregister_device | nci_dev_up device_del | nci_open_device | __nci_request | nci_send_cmd | queue_work !!! Another site is nci_cmd_timer, awaked by the nci_cmd_work from the nci_send_cmd. ... | ... nci_unregister_device | queue_work destroy_workqueue | nfc_unregister_device | ... device_del | nci_cmd_work | mod_timer | ... | nci_cmd_timer | queue_work !!! For the above two UAF, the root cause is that the nfc_dev_up can race between the nci_unregister_device routine. Therefore, this patch introduce NCI_UNREG flag to easily eliminate the possible race. In addition, the mutex_lock in nci_close_device can act as a barrier. Signed-off-by: Lin Ma Fixes: 6a2968aaf50c ("NFC: basic NCI protocol implementation") Reviewed-by: Jakub Kicinski Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20211116152732.19238-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski --- include/net/nfc/nci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index a964daedc17b..ea8595651c38 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -30,6 +30,7 @@ enum nci_flag { NCI_UP, NCI_DATA_EXCHANGE, NCI_DATA_EXCHANGE_TO, + NCI_UNREG, }; /* NCI device states */ -- cgit v1.2.3 From 8ff978b8b222bc9d51dd109a46b51026336c95d8 Mon Sep 17 00:00:00 2001 From: Riccardo Paolo Bestetti Date: Wed, 17 Nov 2021 10:00:11 +0100 Subject: ipv4/raw: support binding to nonlocal addresses Add support to inet v4 raw sockets for binding to nonlocal addresses through the IP_FREEBIND and IP_TRANSPARENT socket options, as well as the ipv4.ip_nonlocal_bind kernel parameter. Add helper function to inet_sock.h to check for bind address validity on the base of the address type and whether nonlocal address are enabled for the socket via any of the sockopts/sysctl, deduplicating checks in ipv4/ping.c, ipv4/af_inet.c, ipv6/af_inet6.c (for mapped v4->v6 addresses), and ipv4/raw.c. Add test cases with IP[V6]_FREEBIND verifying that both v4 and v6 raw sockets support binding to nonlocal addresses after the change. Add necessary support for the test cases to nettest. Signed-off-by: Riccardo Paolo Bestetti Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20211117090010.125393-1-pbl@bestov.io Signed-off-by: Jakub Kicinski --- include/net/inet_sock.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 9e1111f5915b..234d70ae5f4c 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -372,4 +372,16 @@ static inline bool inet_can_nonlocal_bind(struct net *net, inet->freebind || inet->transparent; } +static inline bool inet_addr_valid_or_nonlocal(struct net *net, + struct inet_sock *inet, + __be32 addr, + int addr_type) +{ + return inet_can_nonlocal_bind(net, inet) || + addr == htonl(INADDR_ANY) || + addr_type == RTN_LOCAL || + addr_type == RTN_MULTICAST || + addr_type == RTN_BROADCAST; +} + #endif /* _INET_SOCK_H */ -- cgit v1.2.3 From 357a18ad230f0867791b788d2b1d6f280f6f6e61 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 15 Nov 2021 16:50:27 +0000 Subject: KVM: Kill kvm_map_gfn() / kvm_unmap_gfn() and gfn_to_pfn_cache In commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status") I removed the only user of these functions because it was basically impossible to use them safely. There are two stages to the GFN->PFN mapping; first through the KVM memslots to a userspace HVA and then through the page tables to translate that HVA to an underlying PFN. Invalidations of the former were being handled correctly, but no attempt was made to use the MMU notifiers to invalidate the cache when the HVA->GFN mapping changed. As a prelude to reinventing the gfn_to_pfn_cache with more usable semantics, rip it out entirely and untangle the implementation of the unsafe kvm_vcpu_map()/kvm_vcpu_unmap() functions from it. All current users of kvm_vcpu_map() also look broken right now, and will be dealt with separately. They broadly fall into two classes: * Those which map, access the data and immediately unmap. This is mostly gratuitous and could just as well use the existing user HVA, and could probably benefit from a gfn_to_hva_cache as they do so. * Those which keep the mapping around for a longer time, perhaps even using the PFN directly from the guest. These will need to be converted to the new gfn_to_pfn_cache and then kvm_vcpu_map() can be removed too. Signed-off-by: David Woodhouse Message-Id: <20211115165030.7422-8-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 6 +----- include/linux/kvm_types.h | 7 ------- 2 files changed, 1 insertion(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 60a35d9fe259..eb625af4fc5e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -866,7 +866,7 @@ void kvm_release_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_accessed(kvm_pfn_t pfn); -void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache); +void kvm_release_pfn(kvm_pfn_t pfn, bool dirty); int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); @@ -942,12 +942,8 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn); kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map); -int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map, - struct gfn_to_pfn_cache *cache, bool atomic); struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty); -int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, - struct gfn_to_pfn_cache *cache, bool dirty, bool atomic); unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 2237abb93ccd..234eab059839 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -53,13 +53,6 @@ struct gfn_to_hva_cache { struct kvm_memory_slot *memslot; }; -struct gfn_to_pfn_cache { - u64 generation; - gfn_t gfn; - kvm_pfn_t pfn; - bool dirty; -}; - #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE /* * Memory caches are used to preallocate memory ahead of various MMU flows, -- cgit v1.2.3 From f915b75bffb7257bd8d26376b8e1cc67771927f8 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 17 Nov 2021 15:56:52 +0800 Subject: page_pool: Revert "page_pool: disable dma mapping support..." This reverts commit d00e60ee54b12de945b8493cf18c1ada9e422514. As reported by Guillaume in [1]: Enabling LPAE always enables CONFIG_ARCH_DMA_ADDR_T_64BIT in 32-bit systems, which breaks the bootup proceess when a ethernet driver is using page pool with PP_FLAG_DMA_MAP flag. As we were hoping we had no active consumers for such system when we removed the dma mapping support, and LPAE seems like a common feature for 32 bits system, so revert it. 1. https://www.spinics.net/lists/netdev/msg779890.html Fixes: d00e60ee54b1 ("page_pool: disable dma mapping support for 32-bit arch with 64-bit DMA") Signed-off-by: Yunsheng Lin Reported-by: "kernelci.org bot" Tested-by: "kernelci.org bot" Acked-by: Jesper Dangaard Brouer Acked-by: Ilias Apalodimas Signed-off-by: David S. Miller --- include/linux/mm_types.h | 13 ++++++++++++- include/net/page_pool.h | 12 +++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index bb8c6f5f19bc..c3a6e6209600 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -105,7 +105,18 @@ struct page { struct page_pool *pp; unsigned long _pp_mapping_pad; unsigned long dma_addr; - atomic_long_t pp_frag_count; + union { + /** + * dma_addr_upper: might require a 64-bit + * value on 32-bit architectures. + */ + unsigned long dma_addr_upper; + /** + * For frag page support, not supported in + * 32-bit architectures with 64-bit DMA. + */ + atomic_long_t pp_frag_count; + }; }; struct { /* slab, slob and slub */ union { diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 3855f069627f..a4082406a003 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -216,14 +216,24 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, page_pool_put_full_page(pool, page, true); } +#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ + (sizeof(dma_addr_t) > sizeof(unsigned long)) + static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { - return page->dma_addr; + dma_addr_t ret = page->dma_addr; + + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) + ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16; + + return ret; } static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) { page->dma_addr = addr; + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) + page->dma_addr_upper = upper_32_bits(addr); } static inline void page_pool_set_frag_count(struct page *page, long nr) -- cgit v1.2.3 From df6160deb3debe6f964c16349f9431157ff67dda Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Nov 2021 17:57:29 -0800 Subject: tcp: add missing htmldocs for skb->ll_node and sk->defer_list Add missing entries to fix these "make htmldocs" warnings. ./include/linux/skbuff.h:953: warning: Function parameter or member 'll_node' not described in 'sk_buff' ./include/net/sock.h:540: warning: Function parameter or member 'defer_list' not described in 'sock' Fixes: f35f821935d8 ("tcp: defer skb freeing after socket lock is released") Signed-off-by: Eric Dumazet Reported-by: Stephen Rothwell Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + include/net/sock.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b8b806512e16..100fd604fbc9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -627,6 +627,7 @@ typedef unsigned char *sk_buff_data_t; * for retransmit timer * @rbnode: RB tree node, alternative to next/prev for netem/tcp * @list: queue head + * @ll_node: anchor in an llist (eg socket defer_list) * @sk: Socket we are owned by * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in * fragmentation management diff --git a/include/net/sock.h b/include/net/sock.h index f09c0c4736c4..a79fc772324e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -292,6 +292,7 @@ struct bpf_local_storage; * @sk_pacing_shift: scaling factor for TCP Small Queues * @sk_lingertime: %SO_LINGER l_linger setting * @sk_backlog: always used with the per-socket spinlock held + * @defer_list: head of llist storing skbs to be freed * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, -- cgit v1.2.3 From 0568c3bf3f34ad2f86e6b2dfaa0855aad9c1c562 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Thu, 18 Nov 2021 18:11:57 +0800 Subject: net: mscc: ocelot: add MAC table stream learn and lookup operations ocelot_mact_learn_streamdata() can be used in VSC9959 to overwrite an FDB entry with stream data. The stream data includes SFID and SSID which can be used for PSFP and FRER set. ocelot_mact_lookup() can be used to check if the given {DMAC, VID} FDB entry is exist, and also can retrieve the DEST_IDX and entry type for the FDB entry. Signed-off-by: Xiaoliang Yang Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index fef3a36b0210..1d5ff11e4100 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -593,6 +593,19 @@ enum ocelot_sb_pool { OCELOT_SB_POOL_NUM, }; +/* MAC table entry types. + * ENTRYTYPE_NORMAL is subject to aging. + * ENTRYTYPE_LOCKED is not subject to aging. + * ENTRYTYPE_MACv4 is not subject to aging. For IPv4 multicast. + * ENTRYTYPE_MACv6 is not subject to aging. For IPv6 multicast. + */ +enum macaccess_entry_type { + ENTRYTYPE_NORMAL = 0, + ENTRYTYPE_LOCKED, + ENTRYTYPE_MACv4, + ENTRYTYPE_MACv6, +}; + #define OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION BIT(0) #define OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP BIT(1) @@ -870,6 +883,15 @@ void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port, bool tx_pause, bool rx_pause, unsigned long quirks); +int ocelot_mact_lookup(struct ocelot *ocelot, int *dst_idx, + const unsigned char mac[ETH_ALEN], + unsigned int vid, enum macaccess_entry_type *type); +int ocelot_mact_learn_streamdata(struct ocelot *ocelot, int dst_idx, + const unsigned char mac[ETH_ALEN], + unsigned int vid, + enum macaccess_entry_type type, + int sfid, int ssid); + #if IS_ENABLED(CONFIG_BRIDGE_MRP) int ocelot_mrp_add(struct ocelot *ocelot, int port, const struct switchdev_obj_mrp *mrp); -- cgit v1.2.3 From 23e2c506ad6c588b469e3d06cc20299434440d02 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Thu, 18 Nov 2021 18:11:59 +0800 Subject: net: mscc: ocelot: add gate and police action offload to PSFP PSFP support gate and police action. This patch add the gate and police action to flower parse action, check chain ID to determine which block to offload. Adding psfp callback functions to add, delete and update gate and police in PSFP table if hardware supports it. Signed-off-by: Xiaoliang Yang Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 5 +++++ include/soc/mscc/ocelot_vcap.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 1d5ff11e4100..e9985ace59c0 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -555,6 +555,11 @@ struct ocelot_ops { u16 (*wm_enc)(u16 value); u16 (*wm_dec)(u16 value); void (*wm_stat)(u32 val, u32 *inuse, u32 *maxuse); + void (*psfp_init)(struct ocelot *ocelot); + int (*psfp_filter_add)(struct ocelot *ocelot, struct flow_cls_offload *f); + int (*psfp_filter_del)(struct ocelot *ocelot, struct flow_cls_offload *f); + int (*psfp_stats_get)(struct ocelot *ocelot, struct flow_cls_offload *f, + struct flow_stats *stats); }; struct ocelot_vcap_block { diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index eeb1142aa1b1..9cca2f8e61a2 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -656,6 +656,7 @@ enum ocelot_vcap_filter_type { OCELOT_VCAP_FILTER_DUMMY, OCELOT_VCAP_FILTER_PAG, OCELOT_VCAP_FILTER_OFFLOAD, + OCELOT_PSFP_FILTER_OFFLOAD, }; struct ocelot_vcap_id { -- cgit v1.2.3 From 7d4b564d6adde3167dd015f7dbb7aee1d7a4294e Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Thu, 18 Nov 2021 18:12:00 +0800 Subject: net: dsa: felix: support psfp filter on vsc9959 VSC9959 supports Per-Stream Filtering and Policing(PSFP) that complies with the IEEE 802.1Qci standard. The stream is identified by Null stream identification(DMAC and VLAN ID) defined in IEEE802.1CB. For PSFP, four tables need to be set up: stream table, stream filter table, stream gate table, and flow meter table. Identify the stream by parsing the tc flower keys and add it to the stream table. The stream filter table is automatically maintained, and its index is determined by SGID(flow gate index) and FMID(flow meter index). Signed-off-by: Xiaoliang Yang Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 8 ++++++++ include/soc/mscc/ocelot_ana.h | 10 ++++++++++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index e9985ace59c0..5ea72d274d7f 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -586,6 +586,12 @@ enum ocelot_port_tag_config { OCELOT_PORT_TAG_TRUNK = 3, }; +struct ocelot_psfp_list { + struct list_head stream_list; + struct list_head sfi_list; + struct list_head sgi_list; +}; + enum ocelot_sb { OCELOT_SB_BUF, OCELOT_SB_REF, @@ -687,6 +693,8 @@ struct ocelot { struct ocelot_vcap_block block[3]; struct vcap_props *vcap; + struct ocelot_psfp_list psfp; + /* Workqueue to check statistics for overflow with its lock */ struct mutex stats_lock; u64 *stats; diff --git a/include/soc/mscc/ocelot_ana.h b/include/soc/mscc/ocelot_ana.h index 1669481d9779..67e0ae05a5ab 100644 --- a/include/soc/mscc/ocelot_ana.h +++ b/include/soc/mscc/ocelot_ana.h @@ -227,6 +227,11 @@ #define ANA_TABLES_SFIDACCESS_SFID_TBL_CMD(x) ((x) & GENMASK(1, 0)) #define ANA_TABLES_SFIDACCESS_SFID_TBL_CMD_M GENMASK(1, 0) +#define SFIDACCESS_CMD_IDLE 0 +#define SFIDACCESS_CMD_READ 1 +#define SFIDACCESS_CMD_WRITE 2 +#define SFIDACCESS_CMD_INIT 3 + #define ANA_TABLES_SFIDTIDX_SGID_VALID BIT(26) #define ANA_TABLES_SFIDTIDX_SGID(x) (((x) << 18) & GENMASK(25, 18)) #define ANA_TABLES_SFIDTIDX_SGID_M GENMASK(25, 18) @@ -255,6 +260,11 @@ #define ANA_SG_CONFIG_REG_3_INIT_IPS(x) (((x) << 21) & GENMASK(24, 21)) #define ANA_SG_CONFIG_REG_3_INIT_IPS_M GENMASK(24, 21) #define ANA_SG_CONFIG_REG_3_INIT_IPS_X(x) (((x) & GENMASK(24, 21)) >> 21) +#define ANA_SG_CONFIG_REG_3_IPV_VALID BIT(24) +#define ANA_SG_CONFIG_REG_3_IPV_INVALID(x) (((x) << 24) & GENMASK(24, 24)) +#define ANA_SG_CONFIG_REG_3_INIT_IPV(x) (((x) << 21) & GENMASK(23, 21)) +#define ANA_SG_CONFIG_REG_3_INIT_IPV_M GENMASK(23, 21) +#define ANA_SG_CONFIG_REG_3_INIT_IPV_X(x) (((x) & GENMASK(23, 21)) >> 21) #define ANA_SG_CONFIG_REG_3_INIT_GATE_STATE BIT(25) #define ANA_SG_GCL_GS_CONFIG_RSZ 0x4 -- cgit v1.2.3 From 77043c37096d4753b9f40e51445f31eb9dc40295 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Thu, 18 Nov 2021 18:12:02 +0800 Subject: net: mscc: ocelot: use index to set vcap policer Policer was previously automatically assigned from the highest index to the lowest index from policer pool. But police action of tc flower now uses index to set an police entry. This patch uses the police index to set vcap policers, so that one policer can be shared by multiple rules. Signed-off-by: Xiaoliang Yang Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 5ea72d274d7f..2a41685b5c7d 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -562,10 +562,17 @@ struct ocelot_ops { struct flow_stats *stats); }; +struct ocelot_vcap_policer { + struct list_head pol_list; + u16 base; + u16 max; + u16 base2; + u16 max2; +}; + struct ocelot_vcap_block { struct list_head rules; int count; - int pol_lpr; }; struct ocelot_bridge_vlan { @@ -691,6 +698,7 @@ struct ocelot { struct list_head dummy_rules; struct ocelot_vcap_block block[3]; + struct ocelot_vcap_policer vcap_pol; struct vcap_props *vcap; struct ocelot_psfp_list psfp; @@ -905,6 +913,10 @@ int ocelot_mact_learn_streamdata(struct ocelot *ocelot, int dst_idx, enum macaccess_entry_type type, int sfid, int ssid); +int ocelot_vcap_policer_add(struct ocelot *ocelot, u32 pol_ix, + struct ocelot_policer *pol); +int ocelot_vcap_policer_del(struct ocelot *ocelot, u32 pol_ix); + #if IS_ENABLED(CONFIG_BRIDGE_MRP) int ocelot_mrp_add(struct ocelot *ocelot, int port, const struct switchdev_obj_mrp *mrp); -- cgit v1.2.3 From a7e13edf37beee65f2c2ec60c42e5fb89a2958ce Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Thu, 18 Nov 2021 18:12:04 +0800 Subject: net: dsa: felix: restrict psfp rules on ingress port PSFP rules take effect on the streams from any port of VSC9959 switch. This patch use ingress port to limit the rule only active on this port. Each stream can only match two ingress source ports in VSC9959. Streams from lowest port gets the configuration of SFID pointed by MAC Table lookup and streams from highest port gets the configuration of (SFID+1) pointed by MAC Table lookup. This patch defines the PSFP rule on highest port as dummy rule, which means that it does not modify the MAC table. Signed-off-by: Xiaoliang Yang Signed-off-by: David S. Miller --- include/soc/mscc/ocelot.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 2a41685b5c7d..89d17629efe5 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -556,7 +556,8 @@ struct ocelot_ops { u16 (*wm_dec)(u16 value); void (*wm_stat)(u32 val, u32 *inuse, u32 *maxuse); void (*psfp_init)(struct ocelot *ocelot); - int (*psfp_filter_add)(struct ocelot *ocelot, struct flow_cls_offload *f); + int (*psfp_filter_add)(struct ocelot *ocelot, int port, + struct flow_cls_offload *f); int (*psfp_filter_del)(struct ocelot *ocelot, struct flow_cls_offload *f); int (*psfp_stats_get)(struct ocelot *ocelot, struct flow_cls_offload *f, struct flow_stats *stats); -- cgit v1.2.3 From 6966df483d7b5b218aeb0e13e7e334a8fc3c1744 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Thu, 18 Nov 2021 13:49:51 +0200 Subject: regulator: Update protection IRQ helper docs The documentation of IRQ notification helper had still references to first RFC implementation which called BUG() while trying to protect the hardware. Behaviour was improved as calling the BUG() was not a proper solution. Current implementation attempts to call poweroff if handling of potentially damaging error notification fails. Update the documentation to reflect the actual behaviour. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/0c9cc4bcf20c3da66fd5a85c97ee4288e5727538.1637233864.git.matti.vaittinen@fi.rohmeurope.com Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index bd7a73db2e66..54cf566616ae 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -499,7 +499,8 @@ struct regulator_irq_data { * best to shut-down regulator(s) or reboot the SOC if error * handling is repeatedly failing. If fatal_cnt is given the IRQ * handling is aborted if it fails for fatal_cnt times and die() - * callback (if populated) or BUG() is called to try to prevent + * callback (if populated) is called. If die() is not populated + * poweroff for the system is attempted in order to prevent any * further damage. * @reread_ms: The time which is waited before attempting to re-read status * at the worker if IC reading fails. Immediate re-read is done @@ -516,11 +517,12 @@ struct regulator_irq_data { * @data: Driver private data pointer which will be passed as such to * the renable, map_event and die callbacks in regulator_irq_data. * @die: Protection callback. If IC status reading or recovery actions - * fail fatal_cnt times this callback or BUG() is called. This - * callback should implement a final protection attempt like - * disabling the regulator. If protection succeeded this may - * return 0. If anything else is returned the core assumes final - * protection failed and calls BUG() as a last resort. + * fail fatal_cnt times this callback is called or system is + * powered off. This callback should implement a final protection + * attempt like disabling the regulator. If protection succeeded + * die() may return 0. If anything else is returned the core + * assumes final protection failed and attempts to perform a + * poweroff as a last resort. * @map_event: Driver callback to map IRQ status into regulator devices with * events / errors. NOTE: callback MUST initialize both the * errors and notifs for all rdevs which it signals having -- cgit v1.2.3 From e6feefa541f309afed8aa54431681261bc57bcde Mon Sep 17 00:00:00 2001 From: YC Hung Date: Thu, 18 Nov 2021 12:07:43 +0200 Subject: ASoC: SOF: tokens: add token for Mediatek AFE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the definition for Mediatek audio front end(AFE) tokens,include AFE sampling rate, channels, and format. Signed-off-by: YC Hung Reviewed-by: Péter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Kai Vehmanen Reviewed-by: Guennadi Liakhovetski Reviewed-by: Daniel Baluta Signed-off-by: Daniel Baluta Link: https://lore.kernel.org/r/20211118100749.54628-3-daniel.baluta@oss.nxp.com Signed-off-by: Mark Brown --- include/uapi/sound/sof/tokens.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h index 02b71a8deea4..b72fa385bebf 100644 --- a/include/uapi/sound/sof/tokens.h +++ b/include/uapi/sound/sof/tokens.h @@ -140,4 +140,9 @@ #define SOF_TKN_INTEL_HDA_RATE 1500 #define SOF_TKN_INTEL_HDA_CH 1501 +/* AFE */ +#define SOF_TKN_MEDIATEK_AFE_RATE 1600 +#define SOF_TKN_MEDIATEK_AFE_CH 1601 +#define SOF_TKN_MEDIATEK_AFE_FORMAT 1602 + #endif -- cgit v1.2.3 From b72bfcffcfc11858a8fc92998733372606db485e Mon Sep 17 00:00:00 2001 From: YC Hung Date: Thu, 18 Nov 2021 12:07:44 +0200 Subject: ASoC: SOF: topology: Add support for Mediatek AFE DAI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add new sof dai and config to pass topology file configuration to SOF firmware running on Mediatek platform DSP core. Add mediatek audio front end(AFE) to the list of supported sof_dais Signed-off-by: YC Hung Reviewed-by: Péter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Kai Vehmanen Reviewed-by: Guennadi Liakhovetski Reviewed-by: Daniel Baluta Signed-off-by: Daniel Baluta Link: https://lore.kernel.org/r/20211118100749.54628-4-daniel.baluta@oss.nxp.com Signed-off-by: Mark Brown --- include/sound/sof/dai-mediatek.h | 23 +++++++++++++++++++++++ include/sound/sof/dai.h | 3 +++ 2 files changed, 26 insertions(+) create mode 100644 include/sound/sof/dai-mediatek.h (limited to 'include') diff --git a/include/sound/sof/dai-mediatek.h b/include/sound/sof/dai-mediatek.h new file mode 100644 index 000000000000..62dd4720558d --- /dev/null +++ b/include/sound/sof/dai-mediatek.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ +/* + * Copyright(c) 2021 Mediatek Corporation. All rights reserved. + * + * Author: Bo Pan + */ + +#ifndef __INCLUDE_SOUND_SOF_DAI_MEDIATEK_H__ +#define __INCLUDE_SOUND_SOF_DAI_MEDIATEK_H__ + +#include + +struct sof_ipc_dai_mtk_afe_params { + struct sof_ipc_hdr hdr; + u32 channels; + u32 rate; + u32 format; + u32 stream_id; + u32 reserved[4]; /* reserve for future */ +} __packed; + +#endif + diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h index 3782127a7095..5132bc60f54b 100644 --- a/include/sound/sof/dai.h +++ b/include/sound/sof/dai.h @@ -13,6 +13,7 @@ #include #include #include +#include /* * DAI Configuration. @@ -70,6 +71,7 @@ enum sof_ipc_dai_type { SOF_DAI_AMD_BT, /**< AMD ACP BT*/ SOF_DAI_AMD_SP, /**< AMD ACP SP */ SOF_DAI_AMD_DMIC, /**< AMD ACP DMIC */ + SOF_DAI_MEDIATEK_AFE, /**< Mediatek AFE */ }; /* general purpose DAI configuration */ @@ -97,6 +99,7 @@ struct sof_ipc_dai_config { struct sof_ipc_dai_acp_params acpbt; struct sof_ipc_dai_acp_params acpsp; struct sof_ipc_dai_acp_params acpdmic; + struct sof_ipc_dai_mtk_afe_params afe; }; } __packed; -- cgit v1.2.3 From 8b6e88555971eac384b89fb0bd6c72ee4e1e6a6a Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Thu, 18 Nov 2021 13:48:47 +0200 Subject: regulator: rohm-regulator: add helper for restricted voltage setting Few ROHM PMICs have regulators where voltage setting can be done only when regulator is disabled. Add helper for those PMICs. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/6f51871e9fea611d133b5dd2560f4a7ee1ede9cd.1637233864.git.matti.vaittinen@fi.rohmeurope.com Signed-off-by: Mark Brown --- include/linux/mfd/rohm-generic.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h index 35b392a0d73a..35c5866f48b7 100644 --- a/include/linux/mfd/rohm-generic.h +++ b/include/linux/mfd/rohm-generic.h @@ -80,6 +80,8 @@ int rohm_regulator_set_dvs_levels(const struct rohm_dvs_config *dvs, const struct regulator_desc *desc, struct regmap *regmap); +int rohm_regulator_set_voltage_sel_restricted(struct regulator_dev *rdev, + unsigned int sel); #else static inline int rohm_regulator_set_dvs_levels(const struct rohm_dvs_config *dvs, struct device_node *np, @@ -88,6 +90,11 @@ static inline int rohm_regulator_set_dvs_levels(const struct rohm_dvs_config *dv { return 0; } +static int rohm_regulator_set_voltage_sel_restricted(struct regulator_dev *rdev, + unsigned int sel) +{ + return 0; +} #endif #endif -- cgit v1.2.3 From 92b1348277f8893671e5354adde64fe3cf462821 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Thu, 18 Nov 2021 13:49:30 +0200 Subject: regulator: Add units to limit documentation The documentation for limits used at protection level setting did not mention the units. Fix the units in documentation to match values passed in from device-tree (uV, uA, Kelvin) to avoid confusion. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/111114aca991e41e49a32f89b74e95285f07c1e3.1637233864.git.matti.vaittinen@fi.rohmeurope.com Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index bd7a73db2e66..1cb8071fee34 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -101,11 +101,13 @@ enum regulator_detection_severity { * is requested. * @set_over_voltage_protection: Support enabling of and setting limits for over * voltage situation detection. Detection can be configured for same - * severities as over current protection. + * severities as over current protection. Units of uV. * @set_under_voltage_protection: Support enabling of and setting limits for - * under situation detection. + * under voltage situation detection. Detection can be configured for same + * severities as over current protection. Units of uV. * @set_thermal_protection: Support enabling of and setting limits for over - * temperature situation detection. + * temperature situation detection.Detection can be configured for same + * severities as over current protection. Units of degree Kelvin. * * @set_active_discharge: Set active discharge enable/disable of regulators. * -- cgit v1.2.3 From 418e0a3551bbef5b221705b0e5b8412cdc0afd39 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 5 Nov 2021 14:42:24 +0200 Subject: lib/string_helpers: Introduce kasprintf_strarray() We have a few users already that basically want to have array of sequential strings to be allocated and filled. Provide a helper for them (basically adjusted version from gpio-mockup.c). Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij --- include/linux/string_helpers.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 4ba39e1403b2..f67a94013c87 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -100,6 +100,7 @@ char *kstrdup_quotable(const char *src, gfp_t gfp); char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp); char *kstrdup_quotable_file(struct file *file, gfp_t gfp); +char **kasprintf_strarray(gfp_t gfp, const char *prefix, size_t n); void kfree_strarray(char **array, size_t n); #endif -- cgit v1.2.3 From acdb89b6c87a2d7b5c48a82756e6f5c6f599f60a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 5 Nov 2021 14:42:25 +0200 Subject: lib/string_helpers: Introduce managed variant of kasprintf_strarray() Some of the users want to have easy way to allocate array of strings that will be automatically cleaned when associated device is gone. Introduce managed variant of kasprintf_strarray() for such use cases. Signed-off-by: Andy Shevchenko --- include/linux/string_helpers.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index f67a94013c87..7a22921c9db7 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -7,6 +7,7 @@ #include #include +struct device; struct file; struct task_struct; @@ -103,4 +104,6 @@ char *kstrdup_quotable_file(struct file *file, gfp_t gfp); char **kasprintf_strarray(gfp_t gfp, const char *prefix, size_t n); void kfree_strarray(char **array, size_t n); +char **devm_kasprintf_strarray(struct device *dev, const char *prefix, size_t n); + #endif -- cgit v1.2.3 From 57bdeef4716689d9b0e3571034d65cf420f6efcd Mon Sep 17 00:00:00 2001 From: Naveen Naidu Date: Thu, 18 Nov 2021 19:33:11 +0530 Subject: PCI: Add PCI_ERROR_RESPONSE and related definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A config or MMIO read from a PCI device that doesn't exist or doesn't respond causes a PCI error. There's no real data to return to satisfy the CPU read, so most hardware fabricates ~0 data. Add a PCI_ERROR_RESPONSE definition for that and use it where appropriate to make these checks consistent and easier to find. Also add helper definitions PCI_SET_ERROR_RESPONSE() and PCI_POSSIBLE_ERROR() to make the code more readable. Suggested-by: Bjorn Helgaas Link: https://lore.kernel.org/r/55563bf4dfc5d3fdc96695373c659d099bf175b1.1637243717.git.naveennaidu479@gmail.com Signed-off-by: Naveen Naidu Signed-off-by: Bjorn Helgaas Reviewed-by: Pali Rohár --- include/linux/pci.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 18a75c8e615c..0ce26850470e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -154,6 +154,15 @@ enum pci_interrupt_pin { /* The number of legacy PCI INTx interrupts */ #define PCI_NUM_INTX 4 +/* + * Reading from a device that doesn't respond typically returns ~0. A + * successful read from a device may also return ~0, so you need additional + * information to reliably identify errors. + */ +#define PCI_ERROR_RESPONSE (~0ULL) +#define PCI_SET_ERROR_RESPONSE(val) (*(val) = ((typeof(*(val))) PCI_ERROR_RESPONSE)) +#define PCI_POSSIBLE_ERROR(val) ((val) == ((typeof(val)) PCI_ERROR_RESPONSE)) + /* * pci_power_t values must match the bits in the Capabilities PME_Support * and Control/Status PowerState fields in the Power Management capability. -- cgit v1.2.3 From c035713998700e8843c7d087f55bce3c54c0e3ec Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 5 Nov 2021 10:19:05 -0400 Subject: mm: Add functions to zero portions of a folio These functions are wrappers around zero_user_segments(), which means that zero_user_segments() can now be called for compound pages even when CONFIG_TRANSPARENT_HUGEPAGE is disabled. Use 'xend' as the name of the parameter to indicate that this is an excluded end, not the more usual included end. Excluding the end makes more sense to the callers, but can cause confusion to readers who are more used to seeing included ends. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- include/linux/highmem.h | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index c944b3b70ee7..39bb9b47fa9c 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -230,10 +230,10 @@ static inline void tag_clear_highpage(struct page *page) * If we pass in a base or tail page, we can zero up to PAGE_SIZE. * If we pass in a head page, we can zero up to the size of the compound page. */ -#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +#ifdef CONFIG_HIGHMEM void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2); -#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */ +#else static inline void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) @@ -253,7 +253,7 @@ static inline void zero_user_segments(struct page *page, for (i = 0; i < compound_nr(page); i++) flush_dcache_page(page + i); } -#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */ +#endif static inline void zero_user_segment(struct page *page, unsigned start, unsigned end) @@ -363,4 +363,42 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len) kunmap_local(addr); } +/** + * folio_zero_segments() - Zero two byte ranges in a folio. + * @folio: The folio to write to. + * @start1: The first byte to zero. + * @xend1: One more than the last byte in the first range. + * @start2: The first byte to zero in the second range. + * @xend2: One more than the last byte in the second range. + */ +static inline void folio_zero_segments(struct folio *folio, + size_t start1, size_t xend1, size_t start2, size_t xend2) +{ + zero_user_segments(&folio->page, start1, xend1, start2, xend2); +} + +/** + * folio_zero_segment() - Zero a byte range in a folio. + * @folio: The folio to write to. + * @start: The first byte to zero. + * @xend: One more than the last byte to zero. + */ +static inline void folio_zero_segment(struct folio *folio, + size_t start, size_t xend) +{ + zero_user_segments(&folio->page, start, xend, 0, 0); +} + +/** + * folio_zero_range() - Zero a byte range in a folio. + * @folio: The folio to write to. + * @start: The first byte to zero. + * @length: The number of bytes to zero. + */ +static inline void folio_zero_range(struct folio *folio, + size_t start, size_t length) +{ + zero_user_segments(&folio->page, start, start + length, 0, 0); +} + #endif /* _LINUX_HIGHMEM_H */ -- cgit v1.2.3 From 2475fcfbe4e383d586c5a58711e436d83a2bdfe6 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Mon, 8 Nov 2021 21:44:41 +0800 Subject: dt-bindings: power: rpmpd: Add QCM2290 support Add compatible and constants for the power domains exposed by the QCM2290 RPM. Signed-off-by: Shawn Guo Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211108134442.30051-3-shawn.guo@linaro.org --- include/dt-bindings/power/qcom-rpmpd.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h index 960f7976a807..340b0ffe5eb8 100644 --- a/include/dt-bindings/power/qcom-rpmpd.h +++ b/include/dt-bindings/power/qcom-rpmpd.h @@ -219,6 +219,16 @@ #define SM6115_VDD_LPI_CX 6 #define SM6115_VDD_LPI_MX 7 +/* QCM2290 Power Domains */ +#define QCM2290_VDDCX 0 +#define QCM2290_VDDCX_AO 1 +#define QCM2290_VDDCX_VFL 2 +#define QCM2290_VDDMX 3 +#define QCM2290_VDDMX_AO 4 +#define QCM2290_VDDMX_VFL 5 +#define QCM2290_VDD_LPI_CX 6 +#define QCM2290_VDD_LPI_MX 7 + /* RPM SMD Power Domain performance levels */ #define RPM_SMD_LEVEL_RETENTION 16 #define RPM_SMD_LEVEL_RETENTION_PLUS 32 -- cgit v1.2.3 From 0a84486d6c1da1c2738544d8fc1b07b1d3ce046f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 16 Nov 2021 14:31:15 -0800 Subject: scsi: core: Remove Scsi_Host.shost_dev_attr_groups Simplify the scsi_host_alloc() implementation by setting the shost_class .dev_groups member instead of copying all host attribute group pointers into the shost_dev_attr_groups[] array. Link: https://lore.kernel.org/r/20211116223115.2103031-1-bvanassche@acm.org Cc: Steffen Maier Cc: Damien Le Moal Suggested-by: Benjamin Block Reviewed-by: Damien Le Moal Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/scsi/scsi_host.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index ebe059badba0..72e1a347baa6 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -691,12 +691,6 @@ struct Scsi_Host { /* ldm bits */ struct device shost_gendev, shost_dev; - /* - * The array size 3 provides space for one attribute group defined by - * the SCSI core, one attribute group defined by the SCSI LLD and one - * terminating NULL pointer. - */ - const struct attribute_group *shost_dev_attr_groups[3]; /* * Points to the transport data (if any) which is allocated -- cgit v1.2.3 From bc2dfc02836b1133d1bf4d22aa13d48ac98eabef Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 23 Oct 2021 11:10:50 +0200 Subject: cfg80211: implement APIs for dedicated radar detection HW If a dedicated (off-channel) radar detection hardware (chain) is available in the hardware/driver, allow this to be used by calling the NL80211_CMD_RADAR_DETECT command with a new flag attribute requesting off-channel radar detection is used. Offchannel CAC (channel availability check) avoids the CAC downtime when switching to a radar channel or when turning on the AP. Drivers advertise support for this using the new feature flag NL80211_EXT_FEATURE_RADAR_OFFCHAN. Tested-by: Evelyn Tsai Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/7468e291ef5d05d692c1738d25b8f778d8ea5c3f.1634979655.git.lorenzo@kernel.org Link: https://lore.kernel.org/r/1e60e60fef00e14401adae81c3d49f3e5f307537.1634979655.git.lorenzo@kernel.org Link: https://lore.kernel.org/r/85fa50f57fc3adb2934c8d9ca0be30394de6b7e8.1634979655.git.lorenzo@kernel.org Link: https://lore.kernel.org/r/4b6c08671ad59aae0ac46fc94c02f31b1610eb72.1634979655.git.lorenzo@kernel.org Link: https://lore.kernel.org/r/241849ccaf2c228873c6f8495bf87b19159ba458.1634979655.git.lorenzo@kernel.org [remove offchan_mutex, fix cfg80211_stop_offchan_radar_detection(), remove gfp_t argument, fix documentation, fix tracing] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 25 +++++++++++++++++++++++++ include/uapi/linux/nl80211.h | 13 +++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 423f97b982ff..db8866d42a4b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4072,6 +4072,15 @@ struct mgmt_frame_regs { * @set_fils_aad: Set FILS AAD data to the AP driver so that the driver can use * those to decrypt (Re)Association Request and encrypt (Re)Association * Response frame. + * + * @set_radar_offchan: Configure dedicated offchannel chain available for + * radar/CAC detection on some hw. This chain can't be used to transmit + * or receive frames and it is bounded to a running wdev. + * Offchannel radar/CAC detection allows to avoid the CAC downtime + * switching to a different channel during CAC detection on the selected + * radar channel. + * The caller is expected to set chandef pointer to NULL in order to + * disable offchannel CAC/radar detection. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4404,6 +4413,8 @@ struct cfg80211_ops { struct cfg80211_color_change_settings *params); int (*set_fils_aad)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_fils_aad *fils_aad); + int (*set_radar_offchan)(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef); }; /* @@ -7633,6 +7644,20 @@ void cfg80211_cac_event(struct net_device *netdev, const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event, gfp_t gfp); +/** + * cfg80211_offchan_cac_event - Channel Availability Check (CAC) offchan event + * @wiphy: the wiphy + * @chandef: chandef for the current channel + * @event: type of event + * + * This function is called when a Channel Availability Check (CAC) is finished, + * started or aborted by a offchannel dedicated chain. + * + * Note that this acquires the wiphy lock. + */ +void cfg80211_offchan_cac_event(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event); /** * cfg80211_gtk_rekey_notify - notify userspace about driver rekeying diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 61cab81e920d..3e734826792f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2639,6 +2639,13 @@ enum nl80211_commands { * Mandatory parameter for the transmitting interface to enable MBSSID. * Optional for the non-transmitting interfaces. * + * @NL80211_ATTR_RADAR_OFFCHAN: Configure dedicated offchannel chain available for + * radar/CAC detection on some hw. This chain can't be used to transmit + * or receive frames and it is bounded to a running wdev. + * Offchannel radar/CAC detection allows to avoid the CAC downtime + * switching on a different channel during CAC detection on the selected + * radar channel. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3145,6 +3152,8 @@ enum nl80211_attrs { NL80211_ATTR_MBSSID_CONFIG, NL80211_ATTR_MBSSID_ELEMS, + NL80211_ATTR_RADAR_OFFCHAN, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -6051,6 +6060,9 @@ enum nl80211_feature_flags { * frames. Userspace has to share FILS AAD details to the driver by using * @NL80211_CMD_SET_FILS_AAD. * + * @NL80211_EXT_FEATURE_RADAR_OFFCHAN: Device supports offchannel radar/CAC + * detection. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6117,6 +6129,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE, NL80211_EXT_FEATURE_BSS_COLOR, NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD, + NL80211_EXT_FEATURE_RADAR_OFFCHAN, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 237337c230b94e78a5a0f88d1705259ab543fc40 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 23 Oct 2021 11:10:51 +0200 Subject: mac80211: introduce set_radar_offchan callback Similar to cfg80211, introduce set_radar_offchan callback in mac80211_ops in order to configure a dedicated offchannel chain available on some hw (e.g. mt7915) to perform offchannel CAC detection and avoid tx/rx downtime. Tested-by: Evelyn Tsai Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/201110606d4f3a7dfdf31440e351f2e2c375d4f0.1634979655.git.lorenzo@kernel.org Signed-off-by: Johannes Berg --- include/net/mac80211.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dd757f0987b0..775dbb982654 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3944,6 +3944,14 @@ struct ieee80211_prep_tx_info { * twt structure. * @twt_teardown_request: Update the hw with TWT teardown request received * from the peer. + * @set_radar_offchan: Configure dedicated offchannel chain available for + * radar/CAC detection on some hw. This chain can't be used to transmit + * or receive frames and it is bounded to a running wdev. + * Offchannel radar/CAC detection allows to avoid the CAC downtime + * switching to a different channel during CAC detection on the selected + * radar channel. + * The caller is expected to set chandef pointer to NULL in order to + * disable offchannel CAC/radar detection. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4272,6 +4280,8 @@ struct ieee80211_ops { struct ieee80211_twt_setup *twt); void (*twt_teardown_request)(struct ieee80211_hw *hw, struct ieee80211_sta *sta, u8 flowid); + int (*set_radar_offchan)(struct ieee80211_hw *hw, + struct cfg80211_chan_def *chandef); }; /** -- cgit v1.2.3 From 1507b153198137dfa9cb4bec7c5dee07089ec3af Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 27 Oct 2021 11:03:42 +0200 Subject: cfg80211: move offchan_cac_event to a dedicated work In order to make cfg80211_offchan_cac_abort() (renamed from cfg80211_offchan_cac_event) callable in other contexts and without so much locking restrictions, make it trigger a new work instead of operating directly. Do some other renames while at it to clarify. Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/6145c3d0f30400a568023f67981981d24c7c6133.1635325205.git.lorenzo@kernel.org [rewrite commit log] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index db8866d42a4b..362da9f6bf39 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7645,19 +7645,13 @@ void cfg80211_cac_event(struct net_device *netdev, enum nl80211_radar_event event, gfp_t gfp); /** - * cfg80211_offchan_cac_event - Channel Availability Check (CAC) offchan event + * cfg80211_offchan_cac_abort - Channel Availability Check offchan abort event * @wiphy: the wiphy - * @chandef: chandef for the current channel - * @event: type of event - * - * This function is called when a Channel Availability Check (CAC) is finished, - * started or aborted by a offchannel dedicated chain. * - * Note that this acquires the wiphy lock. + * This function is called by the driver when a Channel Availability Check + * (CAC) is aborted by a offchannel dedicated chain. */ -void cfg80211_offchan_cac_event(struct wiphy *wiphy, - const struct cfg80211_chan_def *chandef, - enum nl80211_radar_event event); +void cfg80211_offchan_cac_abort(struct wiphy *wiphy); /** * cfg80211_gtk_rekey_notify - notify userspace about driver rekeying -- cgit v1.2.3 From c4f5b30dda01f2f6979a9681142de454991182ee Mon Sep 17 00:00:00 2001 From: Biju Das Date: Fri, 12 Nov 2021 18:44:10 +0000 Subject: reset: Add of_reset_control_get_optional_exclusive() Add optional variant of of_reset_control_get_exclusive(). If the requested reset is not specified in the device tree, this function returns NULL instead of an error. Suggested-by: Philipp Zabel Signed-off-by: Biju Das Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20211112184413.4391-2-biju.das.jz@bp.renesas.com Signed-off-by: Philipp Zabel --- include/linux/reset.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/reset.h b/include/linux/reset.h index db0e6115a2f6..8a21b5756c3e 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -454,6 +454,26 @@ static inline struct reset_control *of_reset_control_get_exclusive( return __of_reset_control_get(node, id, 0, false, false, true); } +/** + * of_reset_control_get_optional_exclusive - Lookup and obtain an optional exclusive + * reference to a reset controller. + * @node: device to be reset by the controller + * @id: reset line name + * + * Optional variant of of_reset_control_get_exclusive(). If the requested reset + * is not specified in the device tree, this function returns NULL instead of + * an error. + * + * Returns a struct reset_control or IS_ERR() condition containing errno. + * + * Use of id names is optional. + */ +static inline struct reset_control *of_reset_control_get_optional_exclusive( + struct device_node *node, const char *id) +{ + return __of_reset_control_get(node, id, 0, false, true, true); +} + /** * of_reset_control_get_shared - Lookup and obtain a shared reference * to a reset controller. -- cgit v1.2.3 From b5d8cf0af167f3ab9f4cfe44918cde01e20a1222 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 18 Nov 2021 12:34:07 -0800 Subject: net/af_iucv: Use struct_group() to zero struct iucv_sock region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memset(), avoid intentionally writing across neighboring fields. Add struct_group() to mark the region of struct iucv_sock that gets initialized to zero. Avoid the future warning: In function 'fortify_memset_chk', inlined from 'iucv_sock_alloc' at net/iucv/af_iucv.c:476:2: ./include/linux/fortify-string.h:199:4: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] 199 | __write_overflow_field(p_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Acked-by: Karsten Graul Link: https://lore.kernel.org/lkml/19ff61a0-0cda-6000-ce56-dc6b367c00d6@linux.ibm.com/ Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- include/net/iucv/af_iucv.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index ff06246dbbb9..df85d19fbf84 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -112,10 +112,12 @@ enum iucv_tx_notify { struct iucv_sock { struct sock sk; - char src_user_id[8]; - char src_name[8]; - char dst_user_id[8]; - char dst_name[8]; + struct_group(init, + char src_user_id[8]; + char src_name[8]; + char dst_user_id[8]; + char dst_name[8]; + ); struct list_head accept_q; spinlock_t accept_q_lock; struct sock *parent; -- cgit v1.2.3 From fcb116bc43c8c37c052530ead79872f8b2615711 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 18 Nov 2021 14:23:21 -0600 Subject: signal: Replace force_fatal_sig with force_exit_sig when in doubt Recently to prevent issues with SECCOMP_RET_KILL and similar signals being changed before they are delivered SA_IMMUTABLE was added. Unfortunately this broke debuggers[1][2] which reasonably expect to be able to trap synchronous SIGTRAP and SIGSEGV even when the target process is not configured to handle those signals. Add force_exit_sig and use it instead of force_fatal_sig where historically the code has directly called do_exit. This has the implementation benefits of going through the signal exit path (including generating core dumps) without the danger of allowing userspace to ignore or change these signals. This avoids userspace regressions as older kernels exited with do_exit which debuggers also can not intercept. In the future is should be possible to improve the quality of implementation of the kernel by changing some of these force_exit_sig calls to force_fatal_sig. That can be done where it matters on a case-by-case basis with careful analysis. Reported-by: Kyle Huey Reported-by: kernel test robot [1] https://lkml.kernel.org/r/CAP045AoMY4xf8aC_4QU_-j7obuEPYgTcnQQP3Yxk=2X90jtpjw@mail.gmail.com [2] https://lkml.kernel.org/r/20211117150258.GB5403@xsang-OptiPlex-9020 Fixes: 00b06da29cf9 ("signal: Add SA_IMMUTABLE to ensure forced siganls do not get changed") Fixes: a3616a3c0272 ("signal/m68k: Use force_sigsegv(SIGSEGV) in fpsp040_die") Fixes: 83a1f27ad773 ("signal/powerpc: On swapcontext failure force SIGSEGV") Fixes: 9bc508cf0791 ("signal/s390: Use force_sigsegv in default_trap_handler") Fixes: 086ec444f866 ("signal/sparc32: In setup_rt_frame and setup_fram use force_fatal_sig") Fixes: c317d306d550 ("signal/sparc32: Exit with a fatal signal when try_to_clear_window_buffer fails") Fixes: 695dd0d634df ("signal/x86: In emulate_vsyscall force a signal instead of calling do_exit") Fixes: 1fbd60df8a85 ("signal/vm86_32: Properly send SIGSEGV when the vm86 state cannot be saved.") Fixes: 941edc5bf174 ("exit/syscall_user_dispatch: Send ordinary signals on failure") Link: https://lkml.kernel.org/r/871r3dqfv8.fsf_-_@email.froward.int.ebiederm.org Reviewed-by: Kees Cook Tested-by: Kees Cook Tested-by: Kyle Huey Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 23505394ef70..33a50642cf41 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -352,6 +352,7 @@ extern __must_check bool do_notify_parent(struct task_struct *, int); extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int); extern void force_fatal_sig(int); +extern void force_exit_sig(int); extern int send_sig(int, struct task_struct *, int); extern int zap_other_threads(struct task_struct *p); extern struct sigqueue *sigqueue_alloc(void); -- cgit v1.2.3 From 98a1ca29768aca6e06e93f2bc4439f01ba439ac4 Mon Sep 17 00:00:00 2001 From: Lukas Middendorf Date: Sun, 18 Apr 2021 01:12:03 +0100 Subject: media: media dvb_frontend: add suspend and resume callbacks to dvb_frontend_ops While dvb_tuner_ops already has dedicated suspend and resume callbacks, dvb_frontend_ops currently does not have them. Add those callbacks and use them for suspend and resume. If they are not set, the old behavior of calling sleep or init is used. This allows dvb_frontend drivers to handle resume differently from init, and suspend differently from sleep. No change is required for drivers not needing this functionality. Link: https://lore.kernel.org/linux-media/20210418001204.7453-2-kernel@tuxforce.de Cc: Lukas Middendorf , Antti Palosaari , Mauro Carvalho Chehab , Luis Chamberlain Signed-off-by: Lukas Middendorf Signed-off-by: Mauro Carvalho Chehab --- include/media/dvb_frontend.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/media/dvb_frontend.h b/include/media/dvb_frontend.h index 0d76fa4551b3..e7c44870f20d 100644 --- a/include/media/dvb_frontend.h +++ b/include/media/dvb_frontend.h @@ -364,6 +364,10 @@ struct dvb_frontend_internal_info { * allocated by the driver. * @init: callback function used to initialize the tuner device. * @sleep: callback function used to put the tuner to sleep. + * @suspend: callback function used to inform that the Kernel will + * suspend. + * @resume: callback function used to inform that the Kernel is + * resuming from suspend. * @write: callback function used by some demod legacy drivers to * allow other drivers to write data into their registers. * Should not be used on new drivers. @@ -443,6 +447,8 @@ struct dvb_frontend_ops { int (*init)(struct dvb_frontend* fe); int (*sleep)(struct dvb_frontend* fe); + int (*suspend)(struct dvb_frontend *fe); + int (*resume)(struct dvb_frontend *fe); int (*write)(struct dvb_frontend* fe, const u8 buf[], int len); @@ -755,7 +761,8 @@ void dvb_frontend_detach(struct dvb_frontend *fe); * &dvb_frontend_ops.tuner_ops.suspend\(\) is available, it calls it. Otherwise, * it will call &dvb_frontend_ops.tuner_ops.sleep\(\), if available. * - * It will also call &dvb_frontend_ops.sleep\(\) to put the demod to suspend. + * It will also call &dvb_frontend_ops.suspend\(\) to put the demod to suspend, + * if available. Otherwise it will call &dvb_frontend_ops.sleep\(\). * * The drivers should also call dvb_frontend_suspend\(\) as part of their * handler for the &device_driver.suspend\(\). @@ -769,7 +776,9 @@ int dvb_frontend_suspend(struct dvb_frontend *fe); * * This function resumes the usual operation of the tuner after resume. * - * In order to resume the frontend, it calls the demod &dvb_frontend_ops.init\(\). + * In order to resume the frontend, it calls the demod + * &dvb_frontend_ops.resume\(\) if available. Otherwise it calls demod + * &dvb_frontend_ops.init\(\). * * If &dvb_frontend_ops.tuner_ops.resume\(\) is available, It, it calls it. * Otherwise,t will call &dvb_frontend_ops.tuner_ops.init\(\), if available. -- cgit v1.2.3 From d68f50e6ad0ee7080b0244a15f2dd3d46040632a Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 18 Oct 2021 14:54:55 +0200 Subject: dt-bindings: clock: samsung: add IDs for some core clocks Add IDs for some core clocks referenced during the boot process. Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20211018125456.8292-1-m.szyprowski@samsung.com Signed-off-by: Sylwester Nawrocki --- include/dt-bindings/clock/exynos4.h | 4 +++- include/dt-bindings/clock/exynos5250.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/exynos4.h b/include/dt-bindings/clock/exynos4.h index 88ec3968b90a..acbfbab875ec 100644 --- a/include/dt-bindings/clock/exynos4.h +++ b/include/dt-bindings/clock/exynos4.h @@ -209,6 +209,7 @@ #define CLK_ACLK400_MCUISP 395 /* Exynos4x12 only */ #define CLK_MOUT_HDMI 396 #define CLK_MOUT_MIXER 397 +#define CLK_MOUT_VPLLSRC 398 /* gate clocks - ppmu */ #define CLK_PPMULEFT 400 @@ -236,9 +237,10 @@ #define CLK_DIV_C2C 458 /* Exynos4x12 only */ #define CLK_DIV_GDL 459 #define CLK_DIV_GDR 460 +#define CLK_DIV_CORE2 461 /* must be greater than maximal clock id */ -#define CLK_NR_CLKS 461 +#define CLK_NR_CLKS 462 /* Exynos4x12 ISP clocks */ #define CLK_ISP_FIMC_ISP 1 diff --git a/include/dt-bindings/clock/exynos5250.h b/include/dt-bindings/clock/exynos5250.h index e259cc01f22f..4680da7357d3 100644 --- a/include/dt-bindings/clock/exynos5250.h +++ b/include/dt-bindings/clock/exynos5250.h @@ -19,6 +19,7 @@ #define CLK_FOUT_EPLL 7 #define CLK_FOUT_VPLL 8 #define CLK_ARM_CLK 9 +#define CLK_DIV_ARM2 10 /* gate for special clocks (sclk) */ #define CLK_SCLK_CAM_BAYER 128 @@ -174,8 +175,9 @@ #define CLK_MOUT_ACLK300_DISP1_SUB 1027 #define CLK_MOUT_APLL 1028 #define CLK_MOUT_MPLL 1029 +#define CLK_MOUT_VPLLSRC 1030 /* must be greater than maximal clock id */ -#define CLK_NR_CLKS 1030 +#define CLK_NR_CLKS 1031 #endif /* _DT_BINDINGS_CLOCK_EXYNOS_5250_H */ -- cgit v1.2.3 From d8466f73010faf71effb21228ae1cbf577dab130 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sat, 16 Oct 2021 14:22:27 +0100 Subject: mtd: rawnand: Export nand_read_page_hwecc_oob_first() Move the function nand_read_page_hwecc_oob_first() (previously nand_davinci_read_page_hwecc_oob_first()) to nand_base.c, and export it as a GPL symbol, so that it can be used by more modules. Cc: # v5.2 Fixes: a0ac778eb82c ("mtd: rawnand: ingenic: Add support for the JZ4740") Signed-off-by: Paul Cercueil Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211016132228.40254-4-paul@crapouillou.net --- include/linux/mtd/rawnand.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index b2f9dd3cbd69..5b88cd51fadb 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1539,6 +1539,8 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, bool force_8bit, bool check_only); int nand_write_data_op(struct nand_chip *chip, const void *buf, unsigned int len, bool force_8bit); +int nand_read_page_hwecc_oob_first(struct nand_chip *chip, uint8_t *buf, + int oob_required, int page); /* Scan and identify a NAND device */ int nand_scan_with_ids(struct nand_chip *chip, unsigned int max_chips, -- cgit v1.2.3 From 8d22679dc89a6d9e1d41b2514902e3f7ef51547a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Nov 2021 18:23:55 -0800 Subject: ipv6: ip6_skb_dst_mtu() cleanups Use const attribute where we can, and cache skb_dst() Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20211119022355.2985984-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/ip6_route.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 5efd0b71dc67..ca2d6b60e1ec 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -263,19 +263,19 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); -static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) +static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) { - unsigned int mtu; - - struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? + const struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; + const struct dst_entry *dst = skb_dst(skb); + unsigned int mtu; if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) { - mtu = READ_ONCE(skb_dst(skb)->dev->mtu); - mtu -= lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); - } else - mtu = dst_mtu(skb_dst(skb)); - + mtu = READ_ONCE(dst->dev->mtu); + mtu -= lwtunnel_headroom(dst->lwtstate, mtu); + } else { + mtu = dst_mtu(dst); + } return mtu; } -- cgit v1.2.3 From adeef3e32146a8d2a73c399dc6f5d76a449131b1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 19 Nov 2021 06:21:51 -0800 Subject: net: constify netdev->dev_addr Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. We converted all users to make modifications via appropriate helpers, make netdev->dev_addr const. The update helpers need to upcast from the buffer to struct netdev_hw_addr. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4f4a299e92de..2462195784a9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2117,7 +2117,7 @@ struct net_device { * Cache lines mostly used on receive path (including eth_type_trans()) */ /* Interface address info used in eth_type_trans() */ - unsigned char *dev_addr; + const unsigned char *dev_addr; struct netdev_rx_queue *_rx; unsigned int num_rx_queues; @@ -4268,10 +4268,13 @@ void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, void __hw_addr_init(struct netdev_hw_addr_list *list); /* Functions used for device addresses handling */ +void dev_addr_mod(struct net_device *dev, unsigned int offset, + const void *addr, size_t len); + static inline void __dev_addr_set(struct net_device *dev, const void *addr, size_t len) { - memcpy(dev->dev_addr, addr, len); + dev_addr_mod(dev, 0, addr, len); } static inline void dev_addr_set(struct net_device *dev, const u8 *addr) @@ -4279,13 +4282,6 @@ static inline void dev_addr_set(struct net_device *dev, const u8 *addr) __dev_addr_set(dev, addr, dev->addr_len); } -static inline void -dev_addr_mod(struct net_device *dev, unsigned int offset, - const void *addr, size_t len) -{ - memcpy(&dev->dev_addr[offset], addr, len); -} - int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); int dev_addr_del(struct net_device *dev, const unsigned char *addr, -- cgit v1.2.3 From d07b26f5bbea9ade34dfd6abea7b3ca056c03cd1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 19 Nov 2021 06:21:53 -0800 Subject: dev_addr: add a modification check netdev->dev_addr should only be modified via helpers, but someone may be casting off the const. Add a runtime check to catch abuses. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2462195784a9..cb7f2661d187 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1942,6 +1942,8 @@ enum netdev_ml_priv_type { * @unlink_list: As netif_addr_lock() can be called recursively, * keep a list of interfaces to be deleted. * + * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2268,6 +2270,8 @@ struct net_device { /* protected by rtnl_lock */ struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE]; + + u8 dev_addr_shadow[MAX_ADDR_LEN]; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -4288,6 +4292,7 @@ int dev_addr_del(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); void dev_addr_flush(struct net_device *dev); int dev_addr_init(struct net_device *dev); +void dev_addr_check(struct net_device *dev); /* Functions used for unicast addresses handling */ int dev_uc_add(struct net_device *dev, const unsigned char *addr); -- cgit v1.2.3 From 4f47d5d507d6f211ebceac76a5f0b83c2eae154b Mon Sep 17 00:00:00 2001 From: Poorva Sonparote Date: Fri, 19 Nov 2021 12:41:34 -0800 Subject: ipv4: Exposing __ip_sock_set_tos() in ip.h Making the static function __ip_sock_set_tos() from net/ipv4/ip_sockglue.c accessible by declaring it in include/net/ip.h The reason for doing this is to use this function to set IP_TOS value in mptcp_setsockopt() without the lock. Signed-off-by: Poorva Sonparote Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/net/ip.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 7d1088888c10..81e23a102a0d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -783,5 +783,6 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val); void ip_sock_set_pktinfo(struct sock *sk); void ip_sock_set_recverr(struct sock *sk); void ip_sock_set_tos(struct sock *sk, int val); +void __ip_sock_set_tos(struct sock *sk, int val); #endif /* _IP_H */ -- cgit v1.2.3 From 85b6d24646e4125c591639841169baa98a2da503 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Fri, 19 Nov 2021 16:43:21 -0800 Subject: shm: extend forced shm destroy to support objects from several IPC nses Currently, the exit_shm() function not designed to work properly when task->sysvshm.shm_clist holds shm objects from different IPC namespaces. This is a real pain when sysctl kernel.shm_rmid_forced = 1, because it leads to use-after-free (reproducer exists). This is an attempt to fix the problem by extending exit_shm mechanism to handle shm's destroy from several IPC ns'es. To achieve that we do several things: 1. add a namespace (non-refcounted) pointer to the struct shmid_kernel 2. during new shm object creation (newseg()/shmget syscall) we initialize this pointer by current task IPC ns 3. exit_shm() fully reworked such that it traverses over all shp's in task->sysvshm.shm_clist and gets IPC namespace not from current task as it was before but from shp's object itself, then call shm_destroy(shp, ns). Note: We need to be really careful here, because as it was said before (1), our pointer to IPC ns non-refcnt'ed. To be on the safe side we using special helper get_ipc_ns_not_zero() which allows to get IPC ns refcounter only if IPC ns not in the "state of destruction". Q/A Q: Why can we access shp->ns memory using non-refcounted pointer? A: Because shp object lifetime is always shorther than IPC namespace lifetime, so, if we get shp object from the task->sysvshm.shm_clist while holding task_lock(task) nobody can steal our namespace. Q: Does this patch change semantics of unshare/setns/clone syscalls? A: No. It's just fixes non-covered case when process may leave IPC namespace without getting task->sysvshm.shm_clist list cleaned up. Link: https://lkml.kernel.org/r/67bb03e5-f79c-1815-e2bf-949c67047418@colorfullife.com Link: https://lkml.kernel.org/r/20211109151501.4921-1-manfred@colorfullife.com Fixes: ab602f79915 ("shm: make exit_shm work proportional to task activity") Co-developed-by: Manfred Spraul Signed-off-by: Manfred Spraul Signed-off-by: Alexander Mikhalitsyn Cc: "Eric W. Biederman" Cc: Davidlohr Bueso Cc: Greg KH Cc: Andrei Vagin Cc: Pavel Tikhomirov Cc: Vasily Averin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 15 +++++++++++++++ include/linux/sched/task.h | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 05e22770af51..b75395ec8d52 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -131,6 +131,16 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) return ns; } +static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) +{ + if (ns) { + if (refcount_inc_not_zero(&ns->ns.count)) + return ns; + } + + return NULL; +} + extern void put_ipc_ns(struct ipc_namespace *ns); #else static inline struct ipc_namespace *copy_ipcs(unsigned long flags, @@ -147,6 +157,11 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) return ns; } +static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) +{ + return ns; +} + static inline void put_ipc_ns(struct ipc_namespace *ns) { } diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index ba88a6987400..058d7f371e25 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -158,7 +158,7 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset and - * ->cgroup.subsys[]. And ->vfork_done. + * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist. * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), -- cgit v1.2.3 From afe041c2d0febd83698b8b0164e6b3b1dfae0b66 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Fri, 19 Nov 2021 16:43:40 -0800 Subject: hugetlb: fix hugetlb cgroup refcounting during mremap When hugetlb_vm_op_open() is called during copy_vma(), we may take the reference to resv_map->css. Later, when clearing the reservation pointer of old_vma after transferring it to new_vma, we forget to drop the reference to resv_map->css. This leads to a reference leak of css. Fixes this by adding a check to drop reservation css reference in clear_vma_resv_huge_pages() Link: https://lkml.kernel.org/r/20211113154412.91134-1-minhquangbui99@gmail.com Fixes: 550a7d60bd5e35 ("mm, hugepages: add mremap() support for hugepage backed vma") Signed-off-by: Bui Quang Minh Reviewed-by: Mike Kravetz Reviewed-by: Mina Almasry Cc: Miaohe Lin Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb_cgroup.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index c137396129db..ba025ae27882 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -128,6 +128,13 @@ static inline void resv_map_dup_hugetlb_cgroup_uncharge_info( css_get(resv_map->css); } +static inline void resv_map_put_hugetlb_cgroup_uncharge_info( + struct resv_map *resv_map) +{ + if (resv_map->css) + css_put(resv_map->css); +} + extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr); extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, @@ -211,6 +218,11 @@ static inline void resv_map_dup_hugetlb_cgroup_uncharge_info( { } +static inline void resv_map_put_hugetlb_cgroup_uncharge_info( + struct resv_map *resv_map) +{ +} + static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr) { -- cgit v1.2.3 From f65b8132092699e4f672111836f3f51c00c354f2 Mon Sep 17 00:00:00 2001 From: Elyes HAOUAS Date: Thu, 28 Oct 2021 23:05:17 +0200 Subject: include/linux/efi.h: Remove unneeded whitespaces before tabs Signed-off-by: Elyes HAOUAS Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index dbd39b20e034..de36fb547602 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -570,8 +570,8 @@ extern struct efi { unsigned long flags; } efi; -#define EFI_RT_SUPPORTED_GET_TIME 0x0001 -#define EFI_RT_SUPPORTED_SET_TIME 0x0002 +#define EFI_RT_SUPPORTED_GET_TIME 0x0001 +#define EFI_RT_SUPPORTED_SET_TIME 0x0002 #define EFI_RT_SUPPORTED_GET_WAKEUP_TIME 0x0004 #define EFI_RT_SUPPORTED_SET_WAKEUP_TIME 0x0008 #define EFI_RT_SUPPORTED_GET_VARIABLE 0x0010 @@ -838,7 +838,7 @@ extern int efi_status_to_err(efi_status_t status); #define EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS 0x0000000000000020 #define EFI_VARIABLE_APPEND_WRITE 0x0000000000000040 -#define EFI_VARIABLE_MASK (EFI_VARIABLE_NON_VOLATILE | \ +#define EFI_VARIABLE_MASK (EFI_VARIABLE_NON_VOLATILE | \ EFI_VARIABLE_BOOTSERVICE_ACCESS | \ EFI_VARIABLE_RUNTIME_ACCESS | \ EFI_VARIABLE_HARDWARE_ERROR_RECORD | \ -- cgit v1.2.3 From 3218910fd5858842a1dd98ce92b602f0878f8210 Mon Sep 17 00:00:00 2001 From: Adrian Larumbe Date: Mon, 1 Nov 2021 18:08:24 +0000 Subject: dmaengine: Add core function and capability check for DMA_MEMCPY_SG This is the old DMA_SG interface that was removed in commit c678fa66341c ("dmaengine: remove DMA_SG as it is dead code in kernel"). It has been renamed to DMA_MEMCPY_SG to better match the MEMSET and MEMSET_SG naming convention. It should only be used for mem2mem copies, either main system memory or CPU-addressable device memory (like video memory on a PCI graphics card). Bringing back this interface was prompted by the need to use the Xilinx CDMA device for mem2mem SG transfers. Signed-off-by: Adrian Larumbe Link: https://lore.kernel.org/r/20211101180825.241048-3-adrianml@alumnos.upm.es Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 9000f3ffce8b..554a86665de9 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -50,6 +50,7 @@ enum dma_status { */ enum dma_transaction_type { DMA_MEMCPY, + DMA_MEMCPY_SG, DMA_XOR, DMA_PQ, DMA_XOR_VAL, @@ -891,6 +892,11 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)( struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, size_t len, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_memcpy_sg)( + struct dma_chan *chan, + struct scatterlist *dst_sg, unsigned int dst_nents, + struct scatterlist *src_sg, unsigned int src_nents, + unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_xor)( struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, unsigned int src_cnt, size_t len, unsigned long flags); @@ -1051,6 +1057,20 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy( len, flags); } +static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy_sg( + struct dma_chan *chan, + struct scatterlist *dst_sg, unsigned int dst_nents, + struct scatterlist *src_sg, unsigned int src_nents, + unsigned long flags) +{ + if (!chan || !chan->device || !chan->device->device_prep_dma_memcpy_sg) + return NULL; + + return chan->device->device_prep_dma_memcpy_sg(chan, dst_sg, dst_nents, + src_sg, src_nents, + flags); +} + static inline bool dmaengine_is_metadata_mode_supported(struct dma_chan *chan, enum dma_desc_metadata_mode mode) { -- cgit v1.2.3 From b88dbe38dca82425a273d126785866af39ba0770 Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Tue, 16 Nov 2021 14:38:35 +0000 Subject: media: uapi: Add VP9 stateless decoder controls Add the VP9 stateless decoder controls plus the documentation that goes with it. Signed-off-by: Boris Brezillon Co-developed-by: Ezequiel Garcia Signed-off-by: Ezequiel Garcia Signed-off-by: Adrian Ratiu Signed-off-by: Andrzej Pietrasiewicz Co-developed-by: Daniel Almeida Signed-off-by: Daniel Almeida Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-ctrls.h | 4 + include/uapi/linux/v4l2-controls.h | 284 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/videodev2.h | 6 + 3 files changed, 294 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h index 575b59fbac77..b3ce438f1329 100644 --- a/include/media/v4l2-ctrls.h +++ b/include/media/v4l2-ctrls.h @@ -50,6 +50,8 @@ struct video_device; * @p_h264_decode_params: Pointer to a struct v4l2_ctrl_h264_decode_params. * @p_h264_pred_weights: Pointer to a struct v4l2_ctrl_h264_pred_weights. * @p_vp8_frame: Pointer to a VP8 frame params structure. + * @p_vp9_compressed_hdr_probs: Pointer to a VP9 frame compressed header probs structure. + * @p_vp9_frame: Pointer to a VP9 frame params structure. * @p_hevc_sps: Pointer to an HEVC sequence parameter set structure. * @p_hevc_pps: Pointer to an HEVC picture parameter set structure. * @p_hevc_slice_params: Pointer to an HEVC slice parameters structure. @@ -80,6 +82,8 @@ union v4l2_ctrl_ptr { struct v4l2_ctrl_hevc_sps *p_hevc_sps; struct v4l2_ctrl_hevc_pps *p_hevc_pps; struct v4l2_ctrl_hevc_slice_params *p_hevc_slice_params; + struct v4l2_ctrl_vp9_compressed_hdr *p_vp9_compressed_hdr_probs; + struct v4l2_ctrl_vp9_frame *p_vp9_frame; struct v4l2_ctrl_hdr10_cll_info *p_hdr10_cll; struct v4l2_ctrl_hdr10_mastering_display *p_hdr10_mastering; struct v4l2_area *p_area; diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index c9eea93a43a9..c8e0f84d204d 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -2018,6 +2018,290 @@ struct v4l2_ctrl_hdr10_mastering_display { __u32 min_display_mastering_luminance; }; +/* Stateless VP9 controls */ + +#define V4L2_VP9_LOOP_FILTER_FLAG_DELTA_ENABLED 0x1 +#define V4L2_VP9_LOOP_FILTER_FLAG_DELTA_UPDATE 0x2 + +/** + * struct v4l2_vp9_loop_filter - VP9 loop filter parameters + * + * @ref_deltas: contains the adjustment needed for the filter level based on the + * chosen reference frame. If this syntax element is not present in the bitstream, + * users should pass its last value. + * @mode_deltas: contains the adjustment needed for the filter level based on the + * chosen mode. If this syntax element is not present in the bitstream, users should + * pass its last value. + * @level: indicates the loop filter strength. + * @sharpness: indicates the sharpness level. + * @flags: combination of V4L2_VP9_LOOP_FILTER_FLAG_{} flags. + * @reserved: padding field. Should be zeroed by applications. + * + * This structure contains all loop filter related parameters. See sections + * '7.2.8 Loop filter semantics' of the VP9 specification for more details. + */ +struct v4l2_vp9_loop_filter { + __s8 ref_deltas[4]; + __s8 mode_deltas[2]; + __u8 level; + __u8 sharpness; + __u8 flags; + __u8 reserved[7]; +}; + +/** + * struct v4l2_vp9_quantization - VP9 quantization parameters + * + * @base_q_idx: indicates the base frame qindex. + * @delta_q_y_dc: indicates the Y DC quantizer relative to base_q_idx. + * @delta_q_uv_dc: indicates the UV DC quantizer relative to base_q_idx. + * @delta_q_uv_ac: indicates the UV AC quantizer relative to base_q_idx. + * @reserved: padding field. Should be zeroed by applications. + * + * Encodes the quantization parameters. See section '7.2.9 Quantization params + * syntax' of the VP9 specification for more details. + */ +struct v4l2_vp9_quantization { + __u8 base_q_idx; + __s8 delta_q_y_dc; + __s8 delta_q_uv_dc; + __s8 delta_q_uv_ac; + __u8 reserved[4]; +}; + +#define V4L2_VP9_SEGMENTATION_FLAG_ENABLED 0x01 +#define V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP 0x02 +#define V4L2_VP9_SEGMENTATION_FLAG_TEMPORAL_UPDATE 0x04 +#define V4L2_VP9_SEGMENTATION_FLAG_UPDATE_DATA 0x08 +#define V4L2_VP9_SEGMENTATION_FLAG_ABS_OR_DELTA_UPDATE 0x10 + +#define V4L2_VP9_SEG_LVL_ALT_Q 0 +#define V4L2_VP9_SEG_LVL_ALT_L 1 +#define V4L2_VP9_SEG_LVL_REF_FRAME 2 +#define V4L2_VP9_SEG_LVL_SKIP 3 +#define V4L2_VP9_SEG_LVL_MAX 4 + +#define V4L2_VP9_SEGMENT_FEATURE_ENABLED(id) (1 << (id)) +#define V4L2_VP9_SEGMENT_FEATURE_ENABLED_MASK 0xf + +/** + * struct v4l2_vp9_segmentation - VP9 segmentation parameters + * + * @feature_data: data attached to each feature. Data entry is only valid if + * the feature is enabled. The array shall be indexed with segment number as + * the first dimension (0..7) and one of V4L2_VP9_SEG_{} as the second dimension. + * @feature_enabled: bitmask defining which features are enabled in each segment. + * The value for each segment is a combination of V4L2_VP9_SEGMENT_FEATURE_ENABLED(id) + * values where id is one of V4L2_VP9_SEG_LVL_{}. + * @tree_probs: specifies the probability values to be used when decoding a + * Segment-ID. See '5.15. Segmentation map' section of the VP9 specification + * for more details. + * @pred_probs: specifies the probability values to be used when decoding a + * Predicted-Segment-ID. See '6.4.14. Get segment id syntax' section of :ref:`vp9` + * for more details. + * @flags: combination of V4L2_VP9_SEGMENTATION_FLAG_{} flags. + * @reserved: padding field. Should be zeroed by applications. + * + * Encodes the quantization parameters. See section '7.2.10 Segmentation params syntax' of + * the VP9 specification for more details. + */ +struct v4l2_vp9_segmentation { + __s16 feature_data[8][4]; + __u8 feature_enabled[8]; + __u8 tree_probs[7]; + __u8 pred_probs[3]; + __u8 flags; + __u8 reserved[5]; +}; + +#define V4L2_VP9_FRAME_FLAG_KEY_FRAME 0x001 +#define V4L2_VP9_FRAME_FLAG_SHOW_FRAME 0x002 +#define V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT 0x004 +#define V4L2_VP9_FRAME_FLAG_INTRA_ONLY 0x008 +#define V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV 0x010 +#define V4L2_VP9_FRAME_FLAG_REFRESH_FRAME_CTX 0x020 +#define V4L2_VP9_FRAME_FLAG_PARALLEL_DEC_MODE 0x040 +#define V4L2_VP9_FRAME_FLAG_X_SUBSAMPLING 0x080 +#define V4L2_VP9_FRAME_FLAG_Y_SUBSAMPLING 0x100 +#define V4L2_VP9_FRAME_FLAG_COLOR_RANGE_FULL_SWING 0x200 + +#define V4L2_VP9_SIGN_BIAS_LAST 0x1 +#define V4L2_VP9_SIGN_BIAS_GOLDEN 0x2 +#define V4L2_VP9_SIGN_BIAS_ALT 0x4 + +#define V4L2_VP9_RESET_FRAME_CTX_NONE 0 +#define V4L2_VP9_RESET_FRAME_CTX_SPEC 1 +#define V4L2_VP9_RESET_FRAME_CTX_ALL 2 + +#define V4L2_VP9_INTERP_FILTER_EIGHTTAP 0 +#define V4L2_VP9_INTERP_FILTER_EIGHTTAP_SMOOTH 1 +#define V4L2_VP9_INTERP_FILTER_EIGHTTAP_SHARP 2 +#define V4L2_VP9_INTERP_FILTER_BILINEAR 3 +#define V4L2_VP9_INTERP_FILTER_SWITCHABLE 4 + +#define V4L2_VP9_REFERENCE_MODE_SINGLE_REFERENCE 0 +#define V4L2_VP9_REFERENCE_MODE_COMPOUND_REFERENCE 1 +#define V4L2_VP9_REFERENCE_MODE_SELECT 2 + +#define V4L2_VP9_PROFILE_MAX 3 + +#define V4L2_CID_STATELESS_VP9_FRAME (V4L2_CID_CODEC_STATELESS_BASE + 300) +/** + * struct v4l2_ctrl_vp9_frame - VP9 frame decoding control + * + * @lf: loop filter parameters. See &v4l2_vp9_loop_filter for more details. + * @quant: quantization parameters. See &v4l2_vp9_quantization for more details. + * @seg: segmentation parameters. See &v4l2_vp9_segmentation for more details. + * @flags: combination of V4L2_VP9_FRAME_FLAG_{} flags. + * @compressed_header_size: compressed header size in bytes. + * @uncompressed_header_size: uncompressed header size in bytes. + * @frame_width_minus_1: add 1 to it and you'll get the frame width expressed in pixels. + * @frame_height_minus_1: add 1 to it and you'll get the frame height expressed in pixels. + * @render_width_minus_1: add 1 to it and you'll get the expected render width expressed in + * pixels. This is not used during the decoding process but might be used by HW scalers + * to prepare a frame that's ready for scanout. + * @render_height_minus_1: add 1 to it and you'll get the expected render height expressed in + * pixels. This is not used during the decoding process but might be used by HW scalers + * to prepare a frame that's ready for scanout. + * @last_frame_ts: "last" reference buffer timestamp. + * The timestamp refers to the timestamp field in struct v4l2_buffer. + * Use v4l2_timeval_to_ns() to convert the struct timeval to a __u64. + * @golden_frame_ts: "golden" reference buffer timestamp. + * The timestamp refers to the timestamp field in struct v4l2_buffer. + * Use v4l2_timeval_to_ns() to convert the struct timeval to a __u64. + * @alt_frame_ts: "alt" reference buffer timestamp. + * The timestamp refers to the timestamp field in struct v4l2_buffer. + * Use v4l2_timeval_to_ns() to convert the struct timeval to a __u64. + * @ref_frame_sign_bias: a bitfield specifying whether the sign bias is set for a given + * reference frame. Either of V4L2_VP9_SIGN_BIAS_{}. + * @reset_frame_context: specifies whether the frame context should be reset to default values. + * Either of V4L2_VP9_RESET_FRAME_CTX_{}. + * @frame_context_idx: frame context that should be used/updated. + * @profile: VP9 profile. Can be 0, 1, 2 or 3. + * @bit_depth: bits per components. Can be 8, 10 or 12. Note that not all profiles support + * 10 and/or 12 bits depths. + * @interpolation_filter: specifies the filter selection used for performing inter prediction. + * Set to one of V4L2_VP9_INTERP_FILTER_{}. + * @tile_cols_log2: specifies the base 2 logarithm of the width of each tile (where the width + * is measured in units of 8x8 blocks). Shall be less than or equal to 6. + * @tile_rows_log2: specifies the base 2 logarithm of the height of each tile (where the height + * is measured in units of 8x8 blocks). + * @reference_mode: specifies the type of inter prediction to be used. + * Set to one of V4L2_VP9_REFERENCE_MODE_{}. + * @reserved: padding field. Should be zeroed by applications. + */ +struct v4l2_ctrl_vp9_frame { + struct v4l2_vp9_loop_filter lf; + struct v4l2_vp9_quantization quant; + struct v4l2_vp9_segmentation seg; + __u32 flags; + __u16 compressed_header_size; + __u16 uncompressed_header_size; + __u16 frame_width_minus_1; + __u16 frame_height_minus_1; + __u16 render_width_minus_1; + __u16 render_height_minus_1; + __u64 last_frame_ts; + __u64 golden_frame_ts; + __u64 alt_frame_ts; + __u8 ref_frame_sign_bias; + __u8 reset_frame_context; + __u8 frame_context_idx; + __u8 profile; + __u8 bit_depth; + __u8 interpolation_filter; + __u8 tile_cols_log2; + __u8 tile_rows_log2; + __u8 reference_mode; + __u8 reserved[7]; +}; + +#define V4L2_VP9_NUM_FRAME_CTX 4 + +/** + * struct v4l2_vp9_mv_probs - VP9 Motion vector probability updates + * @joint: motion vector joint probability updates. + * @sign: motion vector sign probability updates. + * @classes: motion vector class probability updates. + * @class0_bit: motion vector class0 bit probability updates. + * @bits: motion vector bits probability updates. + * @class0_fr: motion vector class0 fractional bit probability updates. + * @fr: motion vector fractional bit probability updates. + * @class0_hp: motion vector class0 high precision fractional bit probability updates. + * @hp: motion vector high precision fractional bit probability updates. + * + * This structure contains new values of motion vector probabilities. + * A value of zero in an array element means there is no update of the relevant probability. + * See `struct v4l2_vp9_prob_updates` for details. + */ +struct v4l2_vp9_mv_probs { + __u8 joint[3]; + __u8 sign[2]; + __u8 classes[2][10]; + __u8 class0_bit[2]; + __u8 bits[2][10]; + __u8 class0_fr[2][2][3]; + __u8 fr[2][3]; + __u8 class0_hp[2]; + __u8 hp[2]; +}; + +#define V4L2_CID_STATELESS_VP9_COMPRESSED_HDR (V4L2_CID_CODEC_STATELESS_BASE + 301) + +#define V4L2_VP9_TX_MODE_ONLY_4X4 0 +#define V4L2_VP9_TX_MODE_ALLOW_8X8 1 +#define V4L2_VP9_TX_MODE_ALLOW_16X16 2 +#define V4L2_VP9_TX_MODE_ALLOW_32X32 3 +#define V4L2_VP9_TX_MODE_SELECT 4 + +/** + * struct v4l2_ctrl_vp9_compressed_hdr - VP9 probability updates control + * @tx_mode: specifies the TX mode. Set to one of V4L2_VP9_TX_MODE_{}. + * @tx8: TX 8x8 probability updates. + * @tx16: TX 16x16 probability updates. + * @tx32: TX 32x32 probability updates. + * @coef: coefficient probability updates. + * @skip: skip probability updates. + * @inter_mode: inter mode probability updates. + * @interp_filter: interpolation filter probability updates. + * @is_inter: is inter-block probability updates. + * @comp_mode: compound prediction mode probability updates. + * @single_ref: single ref probability updates. + * @comp_ref: compound ref probability updates. + * @y_mode: Y prediction mode probability updates. + * @uv_mode: UV prediction mode probability updates. + * @partition: partition probability updates. + * @mv: motion vector probability updates. + * + * This structure holds the probabilities update as parsed in the compressed + * header (Spec 6.3). These values represent the value of probability update after + * being translated with inv_map_table[] (see 6.3.5). A value of zero in an array element + * means that there is no update of the relevant probability. + * + * This control is optional and needs to be used when dealing with the hardware which is + * not capable of parsing the compressed header itself. Only drivers which need it will + * implement it. + */ +struct v4l2_ctrl_vp9_compressed_hdr { + __u8 tx_mode; + __u8 tx8[2][1]; + __u8 tx16[2][2]; + __u8 tx32[2][3]; + __u8 coef[4][2][2][6][6][3]; + __u8 skip[3]; + __u8 inter_mode[7][3]; + __u8 interp_filter[4][2]; + __u8 is_inter[4]; + __u8 comp_mode[5]; + __u8 single_ref[5][2]; + __u8 comp_ref[5]; + __u8 y_mode[4][9]; + __u8 uv_mode[10][9]; + __u8 partition[16][3]; + + struct v4l2_vp9_mv_probs mv; +}; + /* MPEG-compression definitions kept for backwards compatibility */ #ifndef __KERNEL__ #define V4L2_CTRL_CLASS_MPEG V4L2_CTRL_CLASS_CODEC diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index f118fe7a9f58..df8b9c486ba1 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -703,6 +703,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_VP8 v4l2_fourcc('V', 'P', '8', '0') /* VP8 */ #define V4L2_PIX_FMT_VP8_FRAME v4l2_fourcc('V', 'P', '8', 'F') /* VP8 parsed frame */ #define V4L2_PIX_FMT_VP9 v4l2_fourcc('V', 'P', '9', '0') /* VP9 */ +#define V4L2_PIX_FMT_VP9_FRAME v4l2_fourcc('V', 'P', '9', 'F') /* VP9 parsed frame */ #define V4L2_PIX_FMT_HEVC v4l2_fourcc('H', 'E', 'V', 'C') /* HEVC aka H.265 */ #define V4L2_PIX_FMT_FWHT v4l2_fourcc('F', 'W', 'H', 'T') /* Fast Walsh Hadamard Transform (vicodec) */ #define V4L2_PIX_FMT_FWHT_STATELESS v4l2_fourcc('S', 'F', 'W', 'H') /* Stateless FWHT (vicodec) */ @@ -1759,6 +1760,8 @@ struct v4l2_ext_control { struct v4l2_ctrl_mpeg2_sequence __user *p_mpeg2_sequence; struct v4l2_ctrl_mpeg2_picture __user *p_mpeg2_picture; struct v4l2_ctrl_mpeg2_quantisation __user *p_mpeg2_quantisation; + struct v4l2_ctrl_vp9_compressed_hdr __user *p_vp9_compressed_hdr_probs; + struct v4l2_ctrl_vp9_frame __user *p_vp9_frame; void __user *ptr; }; } __attribute__ ((packed)); @@ -1823,6 +1826,9 @@ enum v4l2_ctrl_type { V4L2_CTRL_TYPE_MPEG2_QUANTISATION = 0x0250, V4L2_CTRL_TYPE_MPEG2_SEQUENCE = 0x0251, V4L2_CTRL_TYPE_MPEG2_PICTURE = 0x0252, + + V4L2_CTRL_TYPE_VP9_COMPRESSED_HDR = 0x0260, + V4L2_CTRL_TYPE_VP9_FRAME = 0x0261, }; /* Used in the VIDIOC_QUERYCTRL ioctl for querying controls */ -- cgit v1.2.3 From 3e3b1fb0e5d95c0cd7278717185ca3fb00f5d771 Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Tue, 16 Nov 2021 14:38:36 +0000 Subject: media: Add VP9 v4l2 library Provide code common to vp9 drivers in one central location. Signed-off-by: Andrzej Pietrasiewicz Signed-off-by: Ezequiel Garcia Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-vp9.h | 233 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 include/media/v4l2-vp9.h (limited to 'include') diff --git a/include/media/v4l2-vp9.h b/include/media/v4l2-vp9.h new file mode 100644 index 000000000000..05478ad6d4ab --- /dev/null +++ b/include/media/v4l2-vp9.h @@ -0,0 +1,233 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Helper functions for vp9 codecs. + * + * Copyright (c) 2021 Collabora, Ltd. + * + * Author: Andrzej Pietrasiewicz + */ + +#ifndef _MEDIA_V4L2_VP9_H +#define _MEDIA_V4L2_VP9_H + +#include + +/** + * struct v4l2_vp9_frame_mv_context - motion vector-related probabilities + * + * @joint: motion vector joint probabilities. + * @sign: motion vector sign probabilities. + * @classes: motion vector class probabilities. + * @class0_bit: motion vector class0 bit probabilities. + * @bits: motion vector bits probabilities. + * @class0_fr: motion vector class0 fractional bit probabilities. + * @fr: motion vector fractional bit probabilities. + * @class0_hp: motion vector class0 high precision fractional bit probabilities. + * @hp: motion vector high precision fractional bit probabilities. + * + * A member of v4l2_vp9_frame_context. + */ +struct v4l2_vp9_frame_mv_context { + u8 joint[3]; + u8 sign[2]; + u8 classes[2][10]; + u8 class0_bit[2]; + u8 bits[2][10]; + u8 class0_fr[2][2][3]; + u8 fr[2][3]; + u8 class0_hp[2]; + u8 hp[2]; +}; + +/** + * struct v4l2_vp9_frame_context - frame probabilities, including motion-vector related + * + * @tx8: TX 8x8 probabilities. + * @tx16: TX 16x16 probabilities. + * @tx32: TX 32x32 probabilities. + * @coef: coefficient probabilities. + * @skip: skip probabilities. + * @inter_mode: inter mode probabilities. + * @interp_filter: interpolation filter probabilities. + * @is_inter: is inter-block probabilities. + * @comp_mode: compound prediction mode probabilities. + * @single_ref: single ref probabilities. + * @comp_ref: compound ref probabilities. + * @y_mode: Y prediction mode probabilities. + * @uv_mode: UV prediction mode probabilities. + * @partition: partition probabilities. + * @mv: motion vector probabilities. + * + * Drivers which need to keep track of frame context(s) can use this struct. + * The members correspond to probability tables, which are specified only implicitly in the + * vp9 spec. Section 10.5 "Default probability tables" contains all the types of involved + * tables, i.e. the actual tables are of the same kind, and when they are reset (which is + * mandated by the spec sometimes) they are overwritten with values from the default tables. + */ +struct v4l2_vp9_frame_context { + u8 tx8[2][1]; + u8 tx16[2][2]; + u8 tx32[2][3]; + u8 coef[4][2][2][6][6][3]; + u8 skip[3]; + u8 inter_mode[7][3]; + u8 interp_filter[4][2]; + u8 is_inter[4]; + u8 comp_mode[5]; + u8 single_ref[5][2]; + u8 comp_ref[5]; + u8 y_mode[4][9]; + u8 uv_mode[10][9]; + u8 partition[16][3]; + + struct v4l2_vp9_frame_mv_context mv; +}; + +/** + * struct v4l2_vp9_frame_symbol_counts - pointers to arrays of symbol counts + * + * @partition: partition counts. + * @skip: skip counts. + * @intra_inter: is inter-block counts. + * @tx32p: TX32 counts. + * @tx16p: TX16 counts. + * @tx8p: TX8 counts. + * @y_mode: Y prediction mode counts. + * @uv_mode: UV prediction mode counts. + * @comp: compound prediction mode counts. + * @comp_ref: compound ref counts. + * @single_ref: single ref counts. + * @mv_mode: inter mode counts. + * @filter: interpolation filter counts. + * @mv_joint: motion vector joint counts. + * @sign: motion vector sign counts. + * @classes: motion vector class counts. + * @class0: motion vector class0 bit counts. + * @bits: motion vector bits counts. + * @class0_fp: motion vector class0 fractional bit counts. + * @fp: motion vector fractional bit counts. + * @class0_hp: motion vector class0 high precision fractional bit counts. + * @hp: motion vector high precision fractional bit counts. + * @coeff: coefficient counts. + * @eob: eob counts + * + * The fields correspond to what is specified in section 8.3 "Clear counts process" of the spec. + * Different pieces of hardware can report the counts in different order, so we cannot rely on + * simply overlaying a struct on a relevant block of memory. Instead we provide pointers to + * arrays or array of pointers to arrays in case of coeff, or array of pointers for eob. + */ +struct v4l2_vp9_frame_symbol_counts { + u32 (*partition)[16][4]; + u32 (*skip)[3][2]; + u32 (*intra_inter)[4][2]; + u32 (*tx32p)[2][4]; + u32 (*tx16p)[2][4]; + u32 (*tx8p)[2][2]; + u32 (*y_mode)[4][10]; + u32 (*uv_mode)[10][10]; + u32 (*comp)[5][2]; + u32 (*comp_ref)[5][2]; + u32 (*single_ref)[5][2][2]; + u32 (*mv_mode)[7][4]; + u32 (*filter)[4][3]; + u32 (*mv_joint)[4]; + u32 (*sign)[2][2]; + u32 (*classes)[2][11]; + u32 (*class0)[2][2]; + u32 (*bits)[2][10][2]; + u32 (*class0_fp)[2][2][4]; + u32 (*fp)[2][4]; + u32 (*class0_hp)[2][2]; + u32 (*hp)[2][2]; + u32 (*coeff[4][2][2][6][6])[3]; + u32 *eob[4][2][2][6][6][2]; +}; + +extern const u8 v4l2_vp9_kf_y_mode_prob[10][10][9]; /* Section 10.4 of the spec */ +extern const u8 v4l2_vp9_kf_partition_probs[16][3]; /* Section 10.4 of the spec */ +extern const u8 v4l2_vp9_kf_uv_mode_prob[10][9]; /* Section 10.4 of the spec */ +extern const struct v4l2_vp9_frame_context v4l2_vp9_default_probs; /* Section 10.5 of the spec */ + +/** + * v4l2_vp9_fw_update_probs() - Perform forward update of vp9 probabilities + * + * @probs: current probabilities values + * @deltas: delta values from compressed header + * @dec_params: vp9 frame decoding parameters + * + * This function performs forward updates of probabilities for the vp9 boolean decoder. + * The frame header can contain a directive to update the probabilities (deltas), if so, then + * the deltas are provided in the header, too. The userspace parses those and passes the said + * deltas struct to the kernel. + */ +void v4l2_vp9_fw_update_probs(struct v4l2_vp9_frame_context *probs, + const struct v4l2_ctrl_vp9_compressed_hdr *deltas, + const struct v4l2_ctrl_vp9_frame *dec_params); + +/** + * v4l2_vp9_reset_frame_ctx() - Reset appropriate frame context + * + * @dec_params: vp9 frame decoding parameters + * @frame_context: array of the 4 frame contexts + * + * This function resets appropriate frame contexts, based on what's in dec_params. + * + * Returns the frame context index after the update, which might be reset to zero if + * mandated by the spec. + */ +u8 v4l2_vp9_reset_frame_ctx(const struct v4l2_ctrl_vp9_frame *dec_params, + struct v4l2_vp9_frame_context *frame_context); + +/** + * v4l2_vp9_adapt_coef_probs() - Perform backward update of vp9 coefficients probabilities + * + * @probs: current probabilities values + * @counts: values of symbol counts after the current frame has been decoded + * @use_128: flag to request that 128 is used as update factor if true, otherwise 112 is used + * @frame_is_intra: flag indicating that FrameIsIntra is true + * + * This function performs backward updates of coefficients probabilities for the vp9 boolean + * decoder. After a frame has been decoded the counts of how many times a given symbol has + * occurred are known and are used to update the probability of each symbol. + */ +void v4l2_vp9_adapt_coef_probs(struct v4l2_vp9_frame_context *probs, + struct v4l2_vp9_frame_symbol_counts *counts, + bool use_128, + bool frame_is_intra); + +/** + * v4l2_vp9_adapt_noncoef_probs() - Perform backward update of vp9 non-coefficients probabilities + * + * @probs: current probabilities values + * @counts: values of symbol counts after the current frame has been decoded + * @reference_mode: specifies the type of inter prediction to be used. See + * &v4l2_vp9_reference_mode for more details + * @interpolation_filter: specifies the filter selection used for performing inter prediction. + * See &v4l2_vp9_interpolation_filter for more details + * @tx_mode: specifies the TX mode. See &v4l2_vp9_tx_mode for more details + * @flags: combination of V4L2_VP9_FRAME_FLAG_* flags + * + * This function performs backward updates of non-coefficients probabilities for the vp9 boolean + * decoder. After a frame has been decoded the counts of how many times a given symbol has + * occurred are known and are used to update the probability of each symbol. + */ +void v4l2_vp9_adapt_noncoef_probs(struct v4l2_vp9_frame_context *probs, + struct v4l2_vp9_frame_symbol_counts *counts, + u8 reference_mode, u8 interpolation_filter, u8 tx_mode, + u32 flags); + +/** + * v4l2_vp9_seg_feat_enabled() - Check if a segmentation feature is enabled + * + * @feature_enabled: array of 8-bit flags (for all segments) + * @feature: id of the feature to check + * @segid: id of the segment to look up + * + * This function returns true if a given feature is active in a given segment. + */ +bool +v4l2_vp9_seg_feat_enabled(const u8 *feature_enabled, + unsigned int feature, + unsigned int segid); + +#endif /* _MEDIA_V4L2_VP9_H */ -- cgit v1.2.3 From 16e0c2474fcfaa8a6c61abfaa981994c4564a628 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Mon, 22 Nov 2021 01:27:36 +0200 Subject: dt-bindings: clock: Add bindings for Exynos850 CMU_APM CMU_APM generates clocks for APM IP-core (Active Power Management). In particular it generates RTC clocks, which are needed to enable rtc-s3c driver on Exynos850 SoC. Add clock indices and binding documentation for CMU_APM. Signed-off-by: Sam Protsenko Signed-off-by: Sylwester Nawrocki Reviewed-by: Krzysztof Kozlowski Reviewed-by: Rob Herring Acked-by: Chanwoo Choi Link: https://lore.kernel.org/r/20211121232741.6967-2-semen.protsenko@linaro.org --- include/dt-bindings/clock/exynos850.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/clock/exynos850.h b/include/dt-bindings/clock/exynos850.h index 8999184f94a2..df3978b58304 100644 --- a/include/dt-bindings/clock/exynos850.h +++ b/include/dt-bindings/clock/exynos850.h @@ -55,7 +55,34 @@ #define CLK_GOUT_PERI_BUS 43 #define CLK_GOUT_PERI_UART 44 #define CLK_GOUT_PERI_IP 45 -#define TOP_NR_CLK 46 +#define CLK_MOUT_CLKCMU_APM_BUS 46 +#define CLK_DOUT_CLKCMU_APM_BUS 47 +#define CLK_GOUT_CLKCMU_APM_BUS 48 +#define TOP_NR_CLK 49 + +/* CMU_APM */ +#define CLK_RCO_I3C_PMIC 1 +#define OSCCLK_RCO_APM 2 +#define CLK_RCO_APM__ALV 3 +#define CLK_DLL_DCO 4 +#define CLK_MOUT_APM_BUS_USER 5 +#define CLK_MOUT_RCO_APM_I3C_USER 6 +#define CLK_MOUT_RCO_APM_USER 7 +#define CLK_MOUT_DLL_USER 8 +#define CLK_MOUT_CLKCMU_CHUB_BUS 9 +#define CLK_MOUT_APM_BUS 10 +#define CLK_MOUT_APM_I3C 11 +#define CLK_DOUT_CLKCMU_CHUB_BUS 12 +#define CLK_DOUT_APM_BUS 13 +#define CLK_DOUT_APM_I3C 14 +#define CLK_GOUT_CLKCMU_CMGP_BUS 15 +#define CLK_GOUT_CLKCMU_CHUB_BUS 16 +#define CLK_GOUT_RTC_PCLK 17 +#define CLK_GOUT_TOP_RTC_PCLK 18 +#define CLK_GOUT_I3C_PCLK 19 +#define CLK_GOUT_I3C_SCLK 20 +#define CLK_GOUT_SPEEDY_PCLK 21 +#define APM_NR_CLK 22 /* CMU_HSI */ #define CLK_MOUT_HSI_BUS_USER 1 -- cgit v1.2.3 From c2afeb79fdb24de4cea73f12d2ede84a5a68fa08 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Mon, 22 Nov 2021 01:27:38 +0200 Subject: dt-bindings: clock: Add bindings for Exynos850 CMU_CMGP CMU_CMGP generates USI and ADC clocks for BLK_ALIVE. In particular USI clocks are needed for HSI2C_3 and HSI2C_4 instances. Add clock indices and bindings documentation for CMU_CMGP domain. Signed-off-by: Sam Protsenko Signed-off-by: Sylwester Nawrocki Reviewed-by: Krzysztof Kozlowski Acked-by: Chanwoo Choi Link: https://lore.kernel.org/r/20211121232741.6967-4-semen.protsenko@linaro.org --- include/dt-bindings/clock/exynos850.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/exynos850.h b/include/dt-bindings/clock/exynos850.h index df3978b58304..8aa5e82af0d3 100644 --- a/include/dt-bindings/clock/exynos850.h +++ b/include/dt-bindings/clock/exynos850.h @@ -84,6 +84,23 @@ #define CLK_GOUT_SPEEDY_PCLK 21 #define APM_NR_CLK 22 +/* CMU_CMGP */ +#define CLK_RCO_CMGP 1 +#define CLK_MOUT_CMGP_ADC 2 +#define CLK_MOUT_CMGP_USI0 3 +#define CLK_MOUT_CMGP_USI1 4 +#define CLK_DOUT_CMGP_ADC 5 +#define CLK_DOUT_CMGP_USI0 6 +#define CLK_DOUT_CMGP_USI1 7 +#define CLK_GOUT_CMGP_ADC_S0_PCLK 8 +#define CLK_GOUT_CMGP_ADC_S1_PCLK 9 +#define CLK_GOUT_CMGP_GPIO_PCLK 10 +#define CLK_GOUT_CMGP_USI0_IPCLK 11 +#define CLK_GOUT_CMGP_USI0_PCLK 12 +#define CLK_GOUT_CMGP_USI1_IPCLK 13 +#define CLK_GOUT_CMGP_USI1_PCLK 14 +#define CMGP_NR_CLK 15 + /* CMU_HSI */ #define CLK_MOUT_HSI_BUS_USER 1 #define CLK_MOUT_HSI_MMC_CARD_USER 2 -- cgit v1.2.3 From 448f413a8bdc727d25d9a786ccbdb974fb85d973 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Thu, 18 Nov 2021 20:12:40 +0800 Subject: ethtool: add support to set/get tx copybreak buf size via ethtool Add support for ethtool to set/get tx copybreak buf size. Signed-off-by: Hao Chen Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index a2223b685451..7bc4b8def12c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -231,6 +231,7 @@ enum tunable_id { ETHTOOL_RX_COPYBREAK, ETHTOOL_TX_COPYBREAK, ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */ + ETHTOOL_TX_COPYBREAK_BUF_SIZE, /* * Add your fresh new tunable attribute above and remember to update * tunable_strings[] in net/ethtool/common.c -- cgit v1.2.3 From 0b70c256eba8448b072d25c95ee65e59da8970de Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Thu, 18 Nov 2021 20:12:42 +0800 Subject: ethtool: add support to set/get rx buf len via ethtool Add support to set rx buf len via ethtool -G parameter and get rx buf len via ethtool -g parameter. Signed-off-by: Hao Chen Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- include/linux/ethtool.h | 18 ++++++++++++++++++ include/uapi/linux/ethtool_netlink.h | 1 + 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 845a0ffc16ee..0b252b82988b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -67,6 +67,22 @@ enum { ETH_RSS_HASH_FUNCS_COUNT }; +/** + * struct kernel_ethtool_ringparam - RX/TX ring configuration + * @rx_buf_len: Current length of buffers on the rx ring. + */ +struct kernel_ethtool_ringparam { + u32 rx_buf_len; +}; + +/** + * enum ethtool_supported_ring_param - indicator caps for setting ring params + * @ETHTOOL_RING_USE_RX_BUF_LEN: capture for setting rx_buf_len + */ +enum ethtool_supported_ring_param { + ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), +}; + #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) #define __ETH_RSS_HASH(name) __ETH_RSS_HASH_BIT(ETH_RSS_HASH_##name##_BIT) @@ -432,6 +448,7 @@ struct ethtool_module_power_mode_params { * @cap_link_lanes_supported: indicates if the driver supports lanes * parameter. * @supported_coalesce_params: supported types of interrupt coalescing. + * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Should only set the * @driver, @version, @fw_version and @bus_info fields. If not * implemented, the @driver and @bus_info fields will be filled in @@ -613,6 +630,7 @@ struct ethtool_module_power_mode_params { struct ethtool_ops { u32 cap_link_lanes_supported:1; u32 supported_coalesce_params; + u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); int (*get_regs_len)(struct net_device *); void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 999777d32dcf..cca6e474a085 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -329,6 +329,7 @@ enum { ETHTOOL_A_RINGS_RX_MINI, /* u32 */ ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ ETHTOOL_A_RINGS_TX, /* u32 */ + ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, -- cgit v1.2.3 From 7462494408cd3de8b0bc1e79670bf213288501d0 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Thu, 18 Nov 2021 20:12:43 +0800 Subject: ethtool: extend ringparam setting/getting API with rx_buf_len Add two new parameters kernel_ringparam and extack for .get_ringparam and .set_ringparam to extend more ring params through netlink. Signed-off-by: Hao Chen Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- include/linux/ethtool.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 0b252b82988b..a26f37a27167 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -656,9 +656,13 @@ struct ethtool_ops { struct kernel_ethtool_coalesce *, struct netlink_ext_ack *); void (*get_ringparam)(struct net_device *, - struct ethtool_ringparam *); + struct ethtool_ringparam *, + struct kernel_ethtool_ringparam *, + struct netlink_ext_ack *); int (*set_ringparam)(struct net_device *, - struct ethtool_ringparam *); + struct ethtool_ringparam *, + struct kernel_ethtool_ringparam *, + struct netlink_ext_ack *); void (*get_pause_stats)(struct net_device *dev, struct ethtool_pause_stats *pause_stats); void (*get_pauseparam)(struct net_device *, -- cgit v1.2.3 From 4b66d2161b8125b6caa6971815e85631cf3cf36f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Nov 2021 07:43:31 -0800 Subject: net: annotate accesses to dev->gso_max_size dev->gso_max_size is written under RTNL protection, or when the device is not yet visible, but is read locklessly. Add the READ_ONCE()/WRITE_ONCE() pairs, and use netif_set_gso_max_size() where we can to better document what is going on. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cb7f2661d187..14eeb58ed197 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4731,7 +4731,8 @@ static inline bool netif_needs_gso(struct sk_buff *skb, static inline void netif_set_gso_max_size(struct net_device *dev, unsigned int size) { - dev->gso_max_size = size; + /* dev->gso_max_size is read locklessly from sk_setup_caps() */ + WRITE_ONCE(dev->gso_max_size, size); } static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, -- cgit v1.2.3 From 6d872df3e3b91532b142de9044e5b4984017a55f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Nov 2021 07:43:32 -0800 Subject: net: annotate accesses to dev->gso_max_segs dev->gso_max_segs is written under RTNL protection, or when the device is not yet visible, but is read locklessly. Add netif_set_gso_max_segs() helper. Add the READ_ONCE()/WRITE_ONCE() pairs, and use netif_set_gso_max_segs() where we can to better document what is going on. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 14eeb58ed197..df049864661d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4735,6 +4735,13 @@ static inline void netif_set_gso_max_size(struct net_device *dev, WRITE_ONCE(dev->gso_max_size, size); } +static inline void netif_set_gso_max_segs(struct net_device *dev, + unsigned int segs) +{ + /* dev->gso_max_segs is read locklessly from sk_setup_caps() */ + WRITE_ONCE(dev->gso_max_segs, segs); +} + static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, int pulled_hlen, u16 mac_offset, int mac_len) -- cgit v1.2.3 From 291dcae39bc482f7edc91a50d97027327e0cf5f9 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 19 Nov 2021 10:58:09 -0500 Subject: net: phylink: Add helpers for c22 registers without MDIO Some devices expose memory-mapped c22-compliant PHYs. Because these devices do not have an MDIO bus, we cannot use the existing helpers. Refactor the existing helpers to allow supplying the values for c22 registers directly, instead of using MDIO to access them. Only get_state and set_advertisement are converted, since they contain the most complex logic. Because set_advertisement is never actually used outside phylink_mii_c22_pcs_config, move the MDIO-writing part into that function. Because some modes do not need the advertisement register set at all, we use -EINVAL for this purpose. Additionally, a new function phylink_pcs_enable_an is provided to determine whether to enable autonegotiation. Signed-off-by: Sean Anderson Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/phylink.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 3563820a1765..01224235df0f 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -527,11 +527,12 @@ void phylink_set_port_modes(unsigned long *bits); void phylink_set_10g_modes(unsigned long *mask); void phylink_helper_basex_speed(struct phylink_link_state *state); +void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, + u16 bmsr, u16 lpa); void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, struct phylink_link_state *state); -int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs, - phy_interface_t interface, - const unsigned long *advertising); +int phylink_mii_c22_pcs_encode_advertisement(phy_interface_t interface, + const unsigned long *advertising); int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode, phy_interface_t interface, const unsigned long *advertising); -- cgit v1.2.3 From c4804670026b93f4ebddda30af89fd737bf93931 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Sat, 20 Nov 2021 21:51:54 +0530 Subject: net: wwan: common debugfs base dir for wwan device This patch set brings in a common debugfs base directory i.e. /sys/kernel/debugfs/wwan/ in WWAN Subsystem for a WWAN device instance. So that it avoids driver polluting debugfs root with unrelated directories & possible name collusion. Having a common debugfs base directory for WWAN drivers eases user to match control devices with debugfs entries. WWAN Subsystem creates dentry (/sys/kernel/debugfs/wwan) on module load & removes dentry on module unload. When driver registers a new wwan device, dentry (wwanX) is created for WWAN device instance & on driver unregister dentry is removed. New API is introduced to return the wwan device instance dentry so that driver can create debugfs entries under it. Signed-off-by: M Chetan Kumar Reviewed-by: Loic Poulain Signed-off-by: David S. Miller --- include/linux/wwan.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 9fac819f92e3..1646aa3e6779 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -171,4 +171,6 @@ int wwan_register_ops(struct device *parent, const struct wwan_ops *ops, void wwan_unregister_ops(struct device *parent); +struct dentry *wwan_get_debugfs_dir(struct device *parent); + #endif /* __WWAN_H */ -- cgit v1.2.3 From cb902b332f9545635911063b671927defa5866bf Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 22 Nov 2021 15:24:56 +0100 Subject: sections: global data can be in .bss When checking an address is located in a global data section also check for the .bss section as global variables initialized to 0 can be in there (-fzero-initialized-in-bss). This was found when looking at ensure_safe_net_sysctl which was failing to detect non-init sysctl pointing to a global data section when the data was in the .bss section. Signed-off-by: Antoine Tenart Acked-by: Steven Rostedt (VMware) Acked-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/asm-generic/sections.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 1dfadb2e878d..76a0f16e56cf 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -130,18 +130,24 @@ static inline bool init_section_intersects(void *virt, size_t size) /** * is_kernel_core_data - checks if the pointer address is located in the - * .data section + * .data or .bss section * * @addr: address to check * - * Returns: true if the address is located in .data, false otherwise. + * Returns: true if the address is located in .data or .bss, false otherwise. * Note: On some archs it may return true for core RODATA, and false * for others. But will always be true for core RW data. */ static inline bool is_kernel_core_data(unsigned long addr) { - return addr >= (unsigned long)_sdata && - addr < (unsigned long)_edata; + if (addr >= (unsigned long)_sdata && addr < (unsigned long)_edata) + return true; + + if (addr >= (unsigned long)__bss_start && + addr < (unsigned long)__bss_stop) + return true; + + return false; } /** -- cgit v1.2.3 From 6deb3fb22da19e21ca5372fb34fb868bfceaf74c Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 9 Nov 2021 20:56:56 +0800 Subject: clk: imx8mp: Remove IPG_AUDIO_ROOT from imx8mp-clock.h Since the commit b24e288d5063 ("clk: imx: Remove the audio ipg clock from imx8mp") removes the non-existing IPG_AUDIO_ROOT from the clk-imx8mp.c, and this definition is not used by anywhere, let us removed it in the imx8mp-clock.h as well. Signed-off-by: Hui Wang Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20211109125657.63485-1-hui.wang@canonical.com Signed-off-by: Abel Vesa --- include/dt-bindings/clock/imx8mp-clock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/clock/imx8mp-clock.h b/include/dt-bindings/clock/imx8mp-clock.h index 43927a1b9e94..235c7a00d379 100644 --- a/include/dt-bindings/clock/imx8mp-clock.h +++ b/include/dt-bindings/clock/imx8mp-clock.h @@ -117,7 +117,6 @@ #define IMX8MP_CLK_AUDIO_AHB 108 #define IMX8MP_CLK_MIPI_DSI_ESC_RX 109 #define IMX8MP_CLK_IPG_ROOT 110 -#define IMX8MP_CLK_IPG_AUDIO_ROOT 111 #define IMX8MP_CLK_DRAM_ALT 112 #define IMX8MP_CLK_DRAM_APB 113 #define IMX8MP_CLK_VPU_G1 114 -- cgit v1.2.3 From fba84957e2e2e201cf4e352efe0c7cac0fbb5d5d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 20 Nov 2021 16:31:48 -0800 Subject: skbuff: Move conditional preprocessor directives out of struct sk_buff In preparation for using the struct_group() macro in struct sk_buff, move the conditional preprocessor directives out of the region of struct sk_buff that will be enclosed by struct_group(). While GCC and Clang are happy with conditional preprocessor directives here, sparse is not, even under -Wno-directive-within-macro[1], as would be seen under a C=1 build: net/core/filter.c: note: in included file (through include/linux/netlink.h, include/linux/sock_diag.h): ./include/linux/skbuff.h:820:1: warning: directive in macro's argument list ./include/linux/skbuff.h:822:1: warning: directive in macro's argument list ./include/linux/skbuff.h:846:1: warning: directive in macro's argument list ./include/linux/skbuff.h:848:1: warning: directive in macro's argument list Additionally remove empty macro argument definitions and usage. "objdump -d" shows no object code differences. [1] https://www.spinics.net/lists/linux-sparse/msg10857.html Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- include/linux/skbuff.h | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 059b6266dcd7..c7889afe0d0d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -795,7 +795,7 @@ struct sk_buff { #else #define CLONED_MASK 1 #endif -#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset) +#define CLONED_OFFSET offsetof(struct sk_buff, __cloned_offset) /* private: */ __u8 __cloned_offset[0]; @@ -818,18 +818,10 @@ struct sk_buff { __u32 headers_start[0]; /* public: */ -/* if you move pkt_type around you also must adapt those constants */ -#ifdef __BIG_ENDIAN_BITFIELD -#define PKT_TYPE_MAX (7 << 5) -#else -#define PKT_TYPE_MAX 7 -#endif -#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) - /* private: */ __u8 __pkt_type_offset[0]; /* public: */ - __u8 pkt_type:3; + __u8 pkt_type:3; /* see PKT_TYPE_MAX */ __u8 ignore_df:1; __u8 nf_trace:1; __u8 ip_summed:2; @@ -845,16 +837,10 @@ struct sk_buff { __u8 encap_hdr_csum:1; __u8 csum_valid:1; -#ifdef __BIG_ENDIAN_BITFIELD -#define PKT_VLAN_PRESENT_BIT 7 -#else -#define PKT_VLAN_PRESENT_BIT 0 -#endif -#define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, __pkt_vlan_present_offset) /* private: */ __u8 __pkt_vlan_present_offset[0]; /* public: */ - __u8 vlan_present:1; + __u8 vlan_present:1; /* See PKT_VLAN_PRESENT_BIT */ __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 csum_not_inet:1; @@ -953,6 +939,22 @@ struct sk_buff { #endif }; +/* if you move pkt_type around you also must adapt those constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define PKT_TYPE_MAX (7 << 5) +#else +#define PKT_TYPE_MAX 7 +#endif +#define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) + +/* if you move pkt_vlan_present around you also must adapt these constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define PKT_VLAN_PRESENT_BIT 7 +#else +#define PKT_VLAN_PRESENT_BIT 0 +#endif +#define PKT_VLAN_PRESENT_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) + #ifdef __KERNEL__ /* * Handling routines are only of interest to the kernel -- cgit v1.2.3 From 03f61041c17914355dde7261be9ccdc821ddd454 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 20 Nov 2021 16:31:49 -0800 Subject: skbuff: Switch structure bounds to struct_group() In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Replace the existing empty member position markers "headers_start" and "headers_end" with a struct_group(). This will allow memcpy() and sizeof() to more easily reason about sizes, and improve readability. "pahole" shows no size nor member offset changes to struct sk_buff. "objdump -d" shows no object code changes (outside of WARNs affected by source line number changes). Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Reviewed-by: Jason A. Donenfeld # drivers/net/wireguard/* Link: https://lore.kernel.org/lkml/20210728035006.GD35706@embeddedor Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c7889afe0d0d..eba256af64a5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -811,12 +811,10 @@ struct sk_buff { __u8 active_extensions; #endif - /* fields enclosed in headers_start/headers_end are copied + /* Fields enclosed in headers group are copied * using a single memcpy() in __copy_skb_header() */ - /* private: */ - __u32 headers_start[0]; - /* public: */ + struct_group(headers, /* private: */ __u8 __pkt_type_offset[0]; @@ -921,9 +919,7 @@ struct sk_buff { u64 kcov_handle; #endif - /* private: */ - __u32 headers_end[0]; - /* public: */ + ); /* end headers group */ /* These elements must be at the end, see alloc_skb() for details. */ sk_buff_data_t tail; -- cgit v1.2.3 From 28c916ade1bd4205958f74bb817fd3a05dbb7afc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 18 Nov 2021 16:30:14 +0100 Subject: ASoC: soc-acpi: Set mach->id field on comp_ids matches Commit dac7cbd55dca ("ASoC: Intel: soc-acpi-byt: shrink tables using compatible IDs") and commit 959ae8215a9e ("ASoC: Intel: soc-acpi-cht: shrink tables using compatible IDs") simplified the match tables in soc-acpi-intel-byt-match.c and soc-acpi-intel-cht-match.c by merging identical entries using the new .comp_ids snd_soc_acpi_mach field to point a single entry to multiple ACPI HIDs and clearing the previously unique per entry .id field. But various machine drivers from sound/soc/intel/boards rely on mach->id in one or more ways, e.g. some drivers contain the following snippets: adev = acpi_dev_get_first_match_dev(mach->id, NULL, -1); pkg_found = snd_soc_acpi_find_package_from_hid(mach->id, ... if (!strncmp(snd_soc_cards[i].codec_id, mach->id, 8)) { ... All of which are broken by the match table shrinking. Make the snd_soc_acpi_mach.id field non const (the storage for the tables already is non const) and on a comps_ids match copy the matching HID to the id field to fix this. Fixes: dac7cbd55dca ("ASoC: Intel: soc-acpi-byt: shrink tables using compatible IDs") Fixes: 959ae8215a9e ("ASoC: Intel: soc-acpi-cht: shrink tables using compatible IDs") Suggested-by: Pierre-Louis Bossart Cc: Pierre-Louis Bossart Cc: Brent Lu Signed-off-by: Hans de Goede Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20211118153014.349222-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- include/sound/soc-acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index 31f4c4f9aeea..ac0893df9c76 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -147,7 +147,7 @@ struct snd_soc_acpi_link_adr { */ /* Descriptor for SST ASoC machine driver */ struct snd_soc_acpi_mach { - const u8 id[ACPI_ID_LEN]; + u8 id[ACPI_ID_LEN]; const struct snd_soc_acpi_codecs *comp_ids; const u32 link_mask; const struct snd_soc_acpi_link_adr *links; -- cgit v1.2.3 From 8837cbbf854246f5f4d565f21e6baa945d37aded Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 22 Nov 2021 17:15:12 +0200 Subject: net: ipv6: add fib6_nh_release_dsts stub We need a way to release a fib6_nh's per-cpu dsts when replacing nexthops otherwise we can end up with stale per-cpu dsts which hold net device references, so add a new IPv6 stub called fib6_nh_release_dsts. It must be used after an RCU grace period, so no new dsts can be created through a group's nexthop entry. Similar to fib6_nh_release it shouldn't be used if fib6_nh_init has failed so it doesn't need a dummy stub when IPv6 is not enabled. Fixes: 7bf4796dd099 ("nexthops: add support for replace") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 1 + include/net/ipv6_stubs.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index c412dde4d67d..83b8070d1cc9 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -485,6 +485,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void fib6_nh_release(struct fib6_nh *fib6_nh); +void fib6_nh_release_dsts(struct fib6_nh *fib6_nh); int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index afbce90c4480..45e0339be6fa 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -47,6 +47,7 @@ struct ipv6_stub { struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void (*fib6_nh_release)(struct fib6_nh *fib6_nh); + void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh); void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt); int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify); void (*fib6_rt_update)(struct net *net, struct fib6_info *rt, -- cgit v1.2.3 From 3e9fdc6b73ca862e72ea8a563638cecdc11d26e2 Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Thu, 21 Oct 2021 13:24:54 +0000 Subject: dt-bindings: interconnect: Add Qualcomm MSM8996 DT bindings Add bindings for interconnects on Qualcomm MSM8996. Signed-off-by: Yassine Oudjana Reviewed-by: Rob Herring Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov #db820c Link: https://lore.kernel.org/r/20211021132329.234942-4-y.oudjana@protonmail.com Signed-off-by: Georgi Djakov --- include/dt-bindings/interconnect/qcom,msm8996.h | 163 ++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 include/dt-bindings/interconnect/qcom,msm8996.h (limited to 'include') diff --git a/include/dt-bindings/interconnect/qcom,msm8996.h b/include/dt-bindings/interconnect/qcom,msm8996.h new file mode 100644 index 000000000000..a0b7c0ec7bed --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,msm8996.h @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ +/* + * Qualcomm MSM8996 interconnect IDs + * + * Copyright (c) 2021 Yassine Oudjana + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_MSM8996_H +#define __DT_BINDINGS_INTERCONNECT_QCOM_MSM8996_H + +/* A0NOC */ +#define MASTER_PCIE_0 0 +#define MASTER_PCIE_1 1 +#define MASTER_PCIE_2 2 + +/* A1NOC */ +#define MASTER_CNOC_A1NOC 0 +#define MASTER_CRYPTO_CORE0 1 +#define MASTER_PNOC_A1NOC 2 + +/* A2NOC */ +#define MASTER_USB3 0 +#define MASTER_IPA 1 +#define MASTER_UFS 2 + +/* BIMC */ +#define MASTER_AMPSS_M0 0 +#define MASTER_GRAPHICS_3D 1 +#define MASTER_MNOC_BIMC 2 +#define MASTER_SNOC_BIMC 3 +#define SLAVE_EBI_CH0 4 +#define SLAVE_HMSS_L3 5 +#define SLAVE_BIMC_SNOC_0 6 +#define SLAVE_BIMC_SNOC_1 7 + +/* CNOC */ +#define MASTER_SNOC_CNOC 0 +#define MASTER_QDSS_DAP 1 +#define SLAVE_CNOC_A1NOC 2 +#define SLAVE_CLK_CTL 3 +#define SLAVE_TCSR 4 +#define SLAVE_TLMM 5 +#define SLAVE_CRYPTO_0_CFG 6 +#define SLAVE_MPM 7 +#define SLAVE_PIMEM_CFG 8 +#define SLAVE_IMEM_CFG 9 +#define SLAVE_MESSAGE_RAM 10 +#define SLAVE_BIMC_CFG 11 +#define SLAVE_PMIC_ARB 12 +#define SLAVE_PRNG 13 +#define SLAVE_DCC_CFG 14 +#define SLAVE_RBCPR_MX 15 +#define SLAVE_QDSS_CFG 16 +#define SLAVE_RBCPR_CX 17 +#define SLAVE_QDSS_RBCPR_APU 18 +#define SLAVE_CNOC_MNOC_CFG 19 +#define SLAVE_SNOC_CFG 20 +#define SLAVE_SNOC_MPU_CFG 21 +#define SLAVE_EBI1_PHY_CFG 22 +#define SLAVE_A0NOC_CFG 23 +#define SLAVE_PCIE_1_CFG 24 +#define SLAVE_PCIE_2_CFG 25 +#define SLAVE_PCIE_0_CFG 26 +#define SLAVE_PCIE20_AHB2PHY 27 +#define SLAVE_A0NOC_MPU_CFG 28 +#define SLAVE_UFS_CFG 29 +#define SLAVE_A1NOC_CFG 30 +#define SLAVE_A1NOC_MPU_CFG 31 +#define SLAVE_A2NOC_CFG 32 +#define SLAVE_A2NOC_MPU_CFG 33 +#define SLAVE_SSC_CFG 34 +#define SLAVE_A0NOC_SMMU_CFG 35 +#define SLAVE_A1NOC_SMMU_CFG 36 +#define SLAVE_A2NOC_SMMU_CFG 37 +#define SLAVE_LPASS_SMMU_CFG 38 +#define SLAVE_CNOC_MNOC_MMSS_CFG 39 + +/* MNOC */ +#define MASTER_CNOC_MNOC_CFG 0 +#define MASTER_CPP 1 +#define MASTER_JPEG 2 +#define MASTER_MDP_PORT0 3 +#define MASTER_MDP_PORT1 4 +#define MASTER_ROTATOR 5 +#define MASTER_VIDEO_P0 6 +#define MASTER_VFE 7 +#define MASTER_SNOC_VMEM 8 +#define MASTER_VIDEO_P0_OCMEM 9 +#define MASTER_CNOC_MNOC_MMSS_CFG 10 +#define SLAVE_MNOC_BIMC 11 +#define SLAVE_VMEM 12 +#define SLAVE_SERVICE_MNOC 13 +#define SLAVE_MMAGIC_CFG 14 +#define SLAVE_CPR_CFG 15 +#define SLAVE_MISC_CFG 16 +#define SLAVE_VENUS_THROTTLE_CFG 17 +#define SLAVE_VENUS_CFG 18 +#define SLAVE_VMEM_CFG 19 +#define SLAVE_DSA_CFG 20 +#define SLAVE_MMSS_CLK_CFG 21 +#define SLAVE_DSA_MPU_CFG 22 +#define SLAVE_MNOC_MPU_CFG 23 +#define SLAVE_DISPLAY_CFG 24 +#define SLAVE_DISPLAY_THROTTLE_CFG 25 +#define SLAVE_CAMERA_CFG 26 +#define SLAVE_CAMERA_THROTTLE_CFG 27 +#define SLAVE_GRAPHICS_3D_CFG 28 +#define SLAVE_SMMU_MDP_CFG 29 +#define SLAVE_SMMU_ROT_CFG 30 +#define SLAVE_SMMU_VENUS_CFG 31 +#define SLAVE_SMMU_CPP_CFG 32 +#define SLAVE_SMMU_JPEG_CFG 33 +#define SLAVE_SMMU_VFE_CFG 34 + +/* PNOC */ +#define MASTER_SNOC_PNOC 0 +#define MASTER_SDCC_1 1 +#define MASTER_SDCC_2 2 +#define MASTER_SDCC_4 3 +#define MASTER_USB_HS 4 +#define MASTER_BLSP_1 5 +#define MASTER_BLSP_2 6 +#define MASTER_TSIF 7 +#define SLAVE_PNOC_A1NOC 8 +#define SLAVE_USB_HS 9 +#define SLAVE_SDCC_2 10 +#define SLAVE_SDCC_4 11 +#define SLAVE_TSIF 12 +#define SLAVE_BLSP_2 13 +#define SLAVE_SDCC_1 14 +#define SLAVE_BLSP_1 15 +#define SLAVE_PDM 16 +#define SLAVE_AHB2PHY 17 + +/* SNOC */ +#define MASTER_HMSS 0 +#define MASTER_QDSS_BAM 1 +#define MASTER_SNOC_CFG 2 +#define MASTER_BIMC_SNOC_0 3 +#define MASTER_BIMC_SNOC_1 4 +#define MASTER_A0NOC_SNOC 5 +#define MASTER_A1NOC_SNOC 6 +#define MASTER_A2NOC_SNOC 7 +#define MASTER_QDSS_ETR 8 +#define SLAVE_A0NOC_SNOC 9 +#define SLAVE_A1NOC_SNOC 10 +#define SLAVE_A2NOC_SNOC 11 +#define SLAVE_HMSS 12 +#define SLAVE_LPASS 13 +#define SLAVE_USB3 14 +#define SLAVE_SNOC_BIMC 15 +#define SLAVE_SNOC_CNOC 16 +#define SLAVE_IMEM 17 +#define SLAVE_PIMEM 18 +#define SLAVE_SNOC_VMEM 19 +#define SLAVE_SNOC_PNOC 20 +#define SLAVE_QDSS_STM 21 +#define SLAVE_PCIE_0 22 +#define SLAVE_PCIE_1 23 +#define SLAVE_PCIE_2 24 +#define SLAVE_SERVICE_SNOC 25 + +#endif -- cgit v1.2.3 From 325e0d0aa683a96b9d9cd5802be524d4da5e2dd2 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 18 Oct 2021 18:16:01 -0500 Subject: devlink: Add 'enable_iwarp' generic device param Add a new device generic parameter to enable and disable iWARP functionality on a multi-protocol RDMA device. Signed-off-by: Shiraz Saleem Tested-by: Leszek Kaliszczuk Signed-off-by: Tony Nguyen --- include/net/devlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index aab3d007c577..e3c88fabd700 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -485,6 +485,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, + DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -534,6 +535,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME "enable_vnet" #define DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME "enable_iwarp" +#define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE DEVLINK_PARAM_TYPE_BOOL + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ -- cgit v1.2.3 From e523af4ee56090fbdd9cf474752448d35930bcd4 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 18 Oct 2021 18:16:02 -0500 Subject: net/ice: Add support for enable_iwarp and enable_roce devlink param Allow support for 'enable_iwarp' and 'enable_roce' devlink params to turn on/off iWARP or RoCE protocol support for E800 devices. For example, a user can turn on iWARP functionality with, devlink dev param set pci/0000:07:00.0 name enable_iwarp value true cmode runtime This add an iWARP auxiliary rdma device, ice.iwarp.<>, under this PF. A user request to enable both iWARP and RoCE under the same PF is rejected since this device does not support both protocols simultaneously on the same port. Signed-off-by: Shiraz Saleem Tested-by: Leszek Kaliszczuk Signed-off-by: Tony Nguyen --- include/linux/net/intel/iidc.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h index e32f6712aee0..1289593411d3 100644 --- a/include/linux/net/intel/iidc.h +++ b/include/linux/net/intel/iidc.h @@ -26,6 +26,11 @@ enum iidc_reset_type { IIDC_GLOBR, }; +enum iidc_rdma_protocol { + IIDC_RDMA_PROTOCOL_IWARP = BIT(0), + IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1), +}; + #define IIDC_MAX_USER_PRIORITY 8 /* Struct to hold per RDMA Qset info */ @@ -70,8 +75,6 @@ int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type); int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable); void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos); -#define IIDC_RDMA_ROCE_NAME "roce" - /* Structure representing auxiliary driver tailored information about the core * PCI dev, each auxiliary driver using the IIDC interface will have an * instance of this struct dedicated to it. -- cgit v1.2.3 From 6326948f940dc3f77066d5cdc44ba6afe67830c0 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 29 Sep 2021 11:01:21 -0400 Subject: lsm: security_task_getsecid_subj() -> security_current_getsecid_subj() The security_task_getsecid_subj() LSM hook invites misuse by allowing callers to specify a task even though the hook is only safe when the current task is referenced. Fix this by removing the task_struct argument to the hook, requiring LSM implementations to use the current task. While we are changing the hook declaration we also rename the function to security_current_getsecid_subj() in an effort to reinforce that the hook captures the subjective credentials of the current task and not an arbitrary task on the system. Reviewed-by: Serge Hallyn Reviewed-by: Casey Schaufler Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 3 +-- include/linux/lsm_hooks.h | 8 +++----- include/linux/security.h | 4 ++-- 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index df8de62f4710..ae2228f0711d 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -206,8 +206,7 @@ LSM_HOOK(int, 0, task_fix_setgid, struct cred *new, const struct cred * old, LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid) LSM_HOOK(int, 0, task_getpgid, struct task_struct *p) LSM_HOOK(int, 0, task_getsid, struct task_struct *p) -LSM_HOOK(void, LSM_RET_VOID, task_getsecid_subj, - struct task_struct *p, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, current_getsecid_subj, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, task_getsecid_obj, struct task_struct *p, u32 *secid) LSM_HOOK(int, 0, task_setnice, struct task_struct *p, int nice) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index d45b6f6e27fd..52c1990644b9 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -719,11 +719,9 @@ * @p. * @p contains the task_struct for the process. * Return 0 if permission is granted. - * @task_getsecid_subj: - * Retrieve the subjective security identifier of the task_struct in @p - * and return it in @secid. Special care must be taken to ensure that @p - * is the either the "current" task, or the caller has exclusive access - * to @p. + * @current_getsecid_subj: + * Retrieve the subjective security identifier of the current task and + * return it in @secid. * In case of failure, @secid will be set to zero. * @task_getsecid_obj: * Retrieve the objective security identifier of the task_struct in @p diff --git a/include/linux/security.h b/include/linux/security.h index bbf44a466832..bb301963e333 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -418,7 +418,7 @@ int security_task_fix_setgid(struct cred *new, const struct cred *old, int security_task_setpgid(struct task_struct *p, pid_t pgid); int security_task_getpgid(struct task_struct *p); int security_task_getsid(struct task_struct *p); -void security_task_getsecid_subj(struct task_struct *p, u32 *secid); +void security_current_getsecid_subj(u32 *secid); void security_task_getsecid_obj(struct task_struct *p, u32 *secid); int security_task_setnice(struct task_struct *p, int nice); int security_task_setioprio(struct task_struct *p, int ioprio); @@ -1119,7 +1119,7 @@ static inline int security_task_getsid(struct task_struct *p) return 0; } -static inline void security_task_getsecid_subj(struct task_struct *p, u32 *secid) +static inline void security_current_getsecid_subj(u32 *secid) { *secid = 0; } -- cgit v1.2.3 From ea8a163e02d6925773129e2dd86e419e491b791d Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 16 Nov 2021 11:08:17 +0100 Subject: dt-bindings: phy: Add constants for lan966x serdes Lan966x has: 2 integrated PHYs, 3 SerDes and 2 RGMII interfaces. Which requires to be muxed based on the HW representation. So add constants for each interface to be able to distinguish them. Reviewed-by: Rob Herring Signed-off-by: Horatiu Vultur Link: https://lore.kernel.org/r/20211116100818.1615762-3-horatiu.vultur@microchip.com Signed-off-by: Vinod Koul --- include/dt-bindings/phy/phy-lan966x-serdes.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/dt-bindings/phy/phy-lan966x-serdes.h (limited to 'include') diff --git a/include/dt-bindings/phy/phy-lan966x-serdes.h b/include/dt-bindings/phy/phy-lan966x-serdes.h new file mode 100644 index 000000000000..4330269a901e --- /dev/null +++ b/include/dt-bindings/phy/phy-lan966x-serdes.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ + +#ifndef __PHY_LAN966X_SERDES_H__ +#define __PHY_LAN966X_SERDES_H__ + +#define CU(x) (x) +#define CU_MAX CU(2) +#define SERDES6G(x) (CU_MAX + 1 + (x)) +#define SERDES6G_MAX SERDES6G(3) +#define RGMII(x) (SERDES6G_MAX + 1 + (x)) +#define RGMII_MAX RGMII(2) +#define SERDES_MAX (RGMII_MAX + 1) + +#endif -- cgit v1.2.3 From ec3bb890817e4398f2d46e12e2e205495b116be9 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 22 Nov 2021 11:33:13 +0100 Subject: xfrm: fix dflt policy check when there is no policy configured When there is no policy configured on the system, the default policy is checked in xfrm_route_forward. However, it was done with the wrong direction (XFRM_POLICY_FWD instead of XFRM_POLICY_OUT). The default policy for XFRM_POLICY_FWD was checked just before, with a call to xfrm[46]_policy_check(). CC: stable@vger.kernel.org Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy") Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2308210793a0..55e574511af5 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1162,7 +1162,7 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct net *net = dev_net(skb->dev); - if (xfrm_default_allow(net, XFRM_POLICY_FWD)) + if (xfrm_default_allow(net, XFRM_POLICY_OUT)) return !net->xfrm.policy_count[XFRM_POLICY_OUT] || (skb_dst(skb)->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); -- cgit v1.2.3 From 3c542cfa8266e3364938d055b3d548b7bed7f08e Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Mon, 22 Nov 2021 23:14:20 +0200 Subject: drm/i915/dg2: Tile 4 plane format support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TileF(Tile4 in bspec) format is 4K tile organized into 64B subtiles with same basic shape as for legacy TileY which will be supported by Display13. v2: - Fixed wrong case condition(Jani Nikula) - Increased I915_FORMAT_MOD_F_TILED up to 12(Imre Deak) v3: - s/I915_TILING_F/TILING_4/g - s/I915_FORMAT_MOD_F_TILED/I915_FORMAT_MOD_4_TILED/g - Removed unneeded fencing code v4: - Rebased, fixed merge conflict with new table-oriented format modifier checking(Stan) - Replaced the rest of "Tile F" mentions to "Tile 4"(Stan) v5: - Still had to remove some Tile F mentionings - Moved has_4tile from adlp to DG2(Ramalingam C) - Check specifically for DG2, but not the Display13(Imre) v6: - Moved Tile4 associating struct for modifier/display to the beginning(Imre Deak) - Removed unneeded case I915_FORMAT_MOD_4_TILED modifier checks(Imre Deak) - Fixed I915_FORMAT_MOD_4_TILED to be 9 instead of 12 (Imre Deak) v7: - Fixed display_ver to { 13, 13 }(Imre Deak) - Removed redundant newline(Imre Deak) Reviewed-by: Imre Deak Cc: Imre Deak Cc: Matt Roper Cc: Maarten Lankhorst Signed-off-by: Stanislav Lisovskiy Signed-off-by: Matt Roper Signed-off-by: Juha-Pekka Heikkilä Link: https://patchwork.freedesktop.org/patch/msgid/20211122211420.31584-1-stanislav.lisovskiy@intel.com --- include/uapi/drm/drm_fourcc.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 7f652c96845b..41184a94935d 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -564,6 +564,14 @@ extern "C" { * pitch is required to be a multiple of 4 tile widths. */ #define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC fourcc_mod_code(INTEL, 8) +/* + * Intel F-tiling(aka Tile4) layout + * + * This is a tiled layout using 4Kb tiles in row-major layout. + * Within the tile pixels are laid out in 64 byte units / sub-tiles in OWORD + * (16 bytes) chunks column-major.. + */ +#define I915_FORMAT_MOD_4_TILED fourcc_mod_code(INTEL, 9) /* * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks -- cgit v1.2.3 From 91389c390521a02ecfb91270f5b9d7fae4312ae5 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Thu, 18 Nov 2021 21:33:37 -0600 Subject: clk: sunxi-ng: Allow the CCU core to be built as a module Like the individual CCU drivers, it can be beneficial for memory consumption of cross-platform configurations to only load the CCU core on the relevant platform. For example, a generic arm64 kernel sees the following improvement when building the CCU core and drivers as modules: before: text data bss dec hex filename 13882360 5251670 360800 19494830 12977ae vmlinux after: text data bss dec hex filename 13734787 5086442 360800 19182029 124b1cd vmlinux So the result is a 390KB total reduction in kernel image size. The one early clock provider (sun5i) requires the core to be built in. Now that loading the MMC driver will trigger loading the CCU core, the MMC timing mode functions do not need a compile-time fallback. Signed-off-by: Samuel Holland Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20211119033338.25486-5-samuel@sholland.org --- include/linux/clk/sunxi-ng.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include') diff --git a/include/linux/clk/sunxi-ng.h b/include/linux/clk/sunxi-ng.h index 3cd14acde0a1..cf32123b39f5 100644 --- a/include/linux/clk/sunxi-ng.h +++ b/include/linux/clk/sunxi-ng.h @@ -6,22 +6,7 @@ #ifndef _LINUX_CLK_SUNXI_NG_H_ #define _LINUX_CLK_SUNXI_NG_H_ -#include - -#ifdef CONFIG_SUNXI_CCU int sunxi_ccu_set_mmc_timing_mode(struct clk *clk, bool new_mode); int sunxi_ccu_get_mmc_timing_mode(struct clk *clk); -#else -static inline int sunxi_ccu_set_mmc_timing_mode(struct clk *clk, - bool new_mode) -{ - return -ENOTSUPP; -} - -static inline int sunxi_ccu_get_mmc_timing_mode(struct clk *clk) -{ - return -ENOTSUPP; -} -#endif #endif -- cgit v1.2.3 From c962f10f3931e8409f67dc52725df13e23c67d2d Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Thu, 18 Nov 2021 22:35:39 -0600 Subject: dt-bindings: clk: Add compatibles for D1 CCUs The D1 has a CCU and a R_CCU (PRCM CCU) like most other sunxi SoCs, with 3 and 4 clock inputs, respectively. Add the compatibles and bindings. Signed-off-by: Samuel Holland Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20211119043545.4010-2-samuel@sholland.org --- include/dt-bindings/clock/sun20i-d1-ccu.h | 156 ++++++++++++++++++++++++++++ include/dt-bindings/clock/sun20i-d1-r-ccu.h | 19 ++++ include/dt-bindings/reset/sun20i-d1-ccu.h | 77 ++++++++++++++ include/dt-bindings/reset/sun20i-d1-r-ccu.h | 16 +++ 4 files changed, 268 insertions(+) create mode 100644 include/dt-bindings/clock/sun20i-d1-ccu.h create mode 100644 include/dt-bindings/clock/sun20i-d1-r-ccu.h create mode 100644 include/dt-bindings/reset/sun20i-d1-ccu.h create mode 100644 include/dt-bindings/reset/sun20i-d1-r-ccu.h (limited to 'include') diff --git a/include/dt-bindings/clock/sun20i-d1-ccu.h b/include/dt-bindings/clock/sun20i-d1-ccu.h new file mode 100644 index 000000000000..e3ac53315e1a --- /dev/null +++ b/include/dt-bindings/clock/sun20i-d1-ccu.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: (GPL-2.0+ or MIT) */ +/* + * Copyright (C) 2020 huangzhenwei@allwinnertech.com + * Copyright (C) 2021 Samuel Holland + */ + +#ifndef _DT_BINDINGS_CLK_SUN20I_D1_CCU_H_ +#define _DT_BINDINGS_CLK_SUN20I_D1_CCU_H_ + +#define CLK_PLL_CPUX 0 +#define CLK_PLL_DDR0 1 +#define CLK_PLL_PERIPH0_4X 2 +#define CLK_PLL_PERIPH0_2X 3 +#define CLK_PLL_PERIPH0_800M 4 +#define CLK_PLL_PERIPH0 5 +#define CLK_PLL_PERIPH0_DIV3 6 +#define CLK_PLL_VIDEO0_4X 7 +#define CLK_PLL_VIDEO0_2X 8 +#define CLK_PLL_VIDEO0 9 +#define CLK_PLL_VIDEO1_4X 10 +#define CLK_PLL_VIDEO1_2X 11 +#define CLK_PLL_VIDEO1 12 +#define CLK_PLL_VE 13 +#define CLK_PLL_AUDIO0_4X 14 +#define CLK_PLL_AUDIO0_2X 15 +#define CLK_PLL_AUDIO0 16 +#define CLK_PLL_AUDIO1 17 +#define CLK_PLL_AUDIO1_DIV2 18 +#define CLK_PLL_AUDIO1_DIV5 19 +#define CLK_CPUX 20 +#define CLK_CPUX_AXI 21 +#define CLK_CPUX_APB 22 +#define CLK_PSI_AHB 23 +#define CLK_APB0 24 +#define CLK_APB1 25 +#define CLK_MBUS 26 +#define CLK_DE 27 +#define CLK_BUS_DE 28 +#define CLK_DI 29 +#define CLK_BUS_DI 30 +#define CLK_G2D 31 +#define CLK_BUS_G2D 32 +#define CLK_CE 33 +#define CLK_BUS_CE 34 +#define CLK_VE 35 +#define CLK_BUS_VE 36 +#define CLK_BUS_DMA 37 +#define CLK_BUS_MSGBOX0 38 +#define CLK_BUS_MSGBOX1 39 +#define CLK_BUS_MSGBOX2 40 +#define CLK_BUS_SPINLOCK 41 +#define CLK_BUS_HSTIMER 42 +#define CLK_AVS 43 +#define CLK_BUS_DBG 44 +#define CLK_BUS_PWM 45 +#define CLK_BUS_IOMMU 46 +#define CLK_DRAM 47 +#define CLK_MBUS_DMA 48 +#define CLK_MBUS_VE 49 +#define CLK_MBUS_CE 50 +#define CLK_MBUS_TVIN 51 +#define CLK_MBUS_CSI 52 +#define CLK_MBUS_G2D 53 +#define CLK_MBUS_RISCV 54 +#define CLK_BUS_DRAM 55 +#define CLK_MMC0 56 +#define CLK_MMC1 57 +#define CLK_MMC2 58 +#define CLK_BUS_MMC0 59 +#define CLK_BUS_MMC1 60 +#define CLK_BUS_MMC2 61 +#define CLK_BUS_UART0 62 +#define CLK_BUS_UART1 63 +#define CLK_BUS_UART2 64 +#define CLK_BUS_UART3 65 +#define CLK_BUS_UART4 66 +#define CLK_BUS_UART5 67 +#define CLK_BUS_I2C0 68 +#define CLK_BUS_I2C1 69 +#define CLK_BUS_I2C2 70 +#define CLK_BUS_I2C3 71 +#define CLK_SPI0 72 +#define CLK_SPI1 73 +#define CLK_BUS_SPI0 74 +#define CLK_BUS_SPI1 75 +#define CLK_EMAC_25M 76 +#define CLK_BUS_EMAC 77 +#define CLK_IR_TX 78 +#define CLK_BUS_IR_TX 79 +#define CLK_BUS_GPADC 80 +#define CLK_BUS_THS 81 +#define CLK_I2S0 82 +#define CLK_I2S1 83 +#define CLK_I2S2 84 +#define CLK_I2S2_ASRC 85 +#define CLK_BUS_I2S0 86 +#define CLK_BUS_I2S1 87 +#define CLK_BUS_I2S2 88 +#define CLK_SPDIF_TX 89 +#define CLK_SPDIF_RX 90 +#define CLK_BUS_SPDIF 91 +#define CLK_DMIC 92 +#define CLK_BUS_DMIC 93 +#define CLK_AUDIO_DAC 94 +#define CLK_AUDIO_ADC 95 +#define CLK_BUS_AUDIO 96 +#define CLK_USB_OHCI0 97 +#define CLK_USB_OHCI1 98 +#define CLK_BUS_OHCI0 99 +#define CLK_BUS_OHCI1 100 +#define CLK_BUS_EHCI0 101 +#define CLK_BUS_EHCI1 102 +#define CLK_BUS_OTG 103 +#define CLK_BUS_LRADC 104 +#define CLK_BUS_DPSS_TOP 105 +#define CLK_HDMI_24M 106 +#define CLK_HDMI_CEC_32K 107 +#define CLK_HDMI_CEC 108 +#define CLK_BUS_HDMI 109 +#define CLK_MIPI_DSI 110 +#define CLK_BUS_MIPI_DSI 111 +#define CLK_TCON_LCD0 112 +#define CLK_BUS_TCON_LCD0 113 +#define CLK_TCON_TV 114 +#define CLK_BUS_TCON_TV 115 +#define CLK_TVE 116 +#define CLK_BUS_TVE_TOP 117 +#define CLK_BUS_TVE 118 +#define CLK_TVD 119 +#define CLK_BUS_TVD_TOP 120 +#define CLK_BUS_TVD 121 +#define CLK_LEDC 122 +#define CLK_BUS_LEDC 123 +#define CLK_CSI_TOP 124 +#define CLK_CSI_MCLK 125 +#define CLK_BUS_CSI 126 +#define CLK_TPADC 127 +#define CLK_BUS_TPADC 128 +#define CLK_BUS_TZMA 129 +#define CLK_DSP 130 +#define CLK_BUS_DSP_CFG 131 +#define CLK_RISCV 132 +#define CLK_RISCV_AXI 133 +#define CLK_BUS_RISCV_CFG 134 +#define CLK_FANOUT_24M 135 +#define CLK_FANOUT_12M 136 +#define CLK_FANOUT_16M 137 +#define CLK_FANOUT_25M 138 +#define CLK_FANOUT_32K 139 +#define CLK_FANOUT_27M 140 +#define CLK_FANOUT_PCLK 141 +#define CLK_FANOUT0 142 +#define CLK_FANOUT1 143 +#define CLK_FANOUT2 144 + +#endif /* _DT_BINDINGS_CLK_SUN20I_D1_CCU_H_ */ diff --git a/include/dt-bindings/clock/sun20i-d1-r-ccu.h b/include/dt-bindings/clock/sun20i-d1-r-ccu.h new file mode 100644 index 000000000000..4c2697fd32b0 --- /dev/null +++ b/include/dt-bindings/clock/sun20i-d1-r-ccu.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: (GPL-2.0+ or MIT) */ +/* + * Copyright (C) 2021 Samuel Holland + */ + +#ifndef _DT_BINDINGS_CLK_SUN20I_D1_R_CCU_H_ +#define _DT_BINDINGS_CLK_SUN20I_D1_R_CCU_H_ + +#define CLK_R_AHB 0 + +#define CLK_BUS_R_TIMER 2 +#define CLK_BUS_R_TWD 3 +#define CLK_BUS_R_PPU 4 +#define CLK_R_IR_RX 5 +#define CLK_BUS_R_IR_RX 6 +#define CLK_BUS_R_RTC 7 +#define CLK_BUS_R_CPUCFG 8 + +#endif /* _DT_BINDINGS_CLK_SUN20I_D1_R_CCU_H_ */ diff --git a/include/dt-bindings/reset/sun20i-d1-ccu.h b/include/dt-bindings/reset/sun20i-d1-ccu.h new file mode 100644 index 000000000000..de9ff5203239 --- /dev/null +++ b/include/dt-bindings/reset/sun20i-d1-ccu.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: (GPL-2.0+ or MIT) */ +/* + * Copyright (c) 2020 huangzhenwei@allwinnertech.com + * Copyright (C) 2021 Samuel Holland + */ + +#ifndef _DT_BINDINGS_RST_SUN20I_D1_CCU_H_ +#define _DT_BINDINGS_RST_SUN20I_D1_CCU_H_ + +#define RST_MBUS 0 +#define RST_BUS_DE 1 +#define RST_BUS_DI 2 +#define RST_BUS_G2D 3 +#define RST_BUS_CE 4 +#define RST_BUS_VE 5 +#define RST_BUS_DMA 6 +#define RST_BUS_MSGBOX0 7 +#define RST_BUS_MSGBOX1 8 +#define RST_BUS_MSGBOX2 9 +#define RST_BUS_SPINLOCK 10 +#define RST_BUS_HSTIMER 11 +#define RST_BUS_DBG 12 +#define RST_BUS_PWM 13 +#define RST_BUS_DRAM 14 +#define RST_BUS_MMC0 15 +#define RST_BUS_MMC1 16 +#define RST_BUS_MMC2 17 +#define RST_BUS_UART0 18 +#define RST_BUS_UART1 19 +#define RST_BUS_UART2 20 +#define RST_BUS_UART3 21 +#define RST_BUS_UART4 22 +#define RST_BUS_UART5 23 +#define RST_BUS_I2C0 24 +#define RST_BUS_I2C1 25 +#define RST_BUS_I2C2 26 +#define RST_BUS_I2C3 27 +#define RST_BUS_SPI0 28 +#define RST_BUS_SPI1 29 +#define RST_BUS_EMAC 30 +#define RST_BUS_IR_TX 31 +#define RST_BUS_GPADC 32 +#define RST_BUS_THS 33 +#define RST_BUS_I2S0 34 +#define RST_BUS_I2S1 35 +#define RST_BUS_I2S2 36 +#define RST_BUS_SPDIF 37 +#define RST_BUS_DMIC 38 +#define RST_BUS_AUDIO 39 +#define RST_USB_PHY0 40 +#define RST_USB_PHY1 41 +#define RST_BUS_OHCI0 42 +#define RST_BUS_OHCI1 43 +#define RST_BUS_EHCI0 44 +#define RST_BUS_EHCI1 45 +#define RST_BUS_OTG 46 +#define RST_BUS_LRADC 47 +#define RST_BUS_DPSS_TOP 48 +#define RST_BUS_HDMI_SUB 49 +#define RST_BUS_HDMI_MAIN 50 +#define RST_BUS_MIPI_DSI 51 +#define RST_BUS_TCON_LCD0 52 +#define RST_BUS_TCON_TV 53 +#define RST_BUS_LVDS0 54 +#define RST_BUS_TVE 55 +#define RST_BUS_TVE_TOP 56 +#define RST_BUS_TVD 57 +#define RST_BUS_TVD_TOP 58 +#define RST_BUS_LEDC 59 +#define RST_BUS_CSI 60 +#define RST_BUS_TPADC 61 +#define RST_DSP 62 +#define RST_BUS_DSP_CFG 63 +#define RST_BUS_DSP_DBG 64 +#define RST_BUS_RISCV_CFG 65 + +#endif /* _DT_BINDINGS_RST_SUN20I_D1_CCU_H_ */ diff --git a/include/dt-bindings/reset/sun20i-d1-r-ccu.h b/include/dt-bindings/reset/sun20i-d1-r-ccu.h new file mode 100644 index 000000000000..d93d6423d283 --- /dev/null +++ b/include/dt-bindings/reset/sun20i-d1-r-ccu.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: (GPL-2.0+ or MIT) */ +/* + * Copyright (C) 2021 Samuel Holland + */ + +#ifndef _DT_BINDINGS_RST_SUN20I_D1_R_CCU_H_ +#define _DT_BINDINGS_RST_SUN20I_D1_R_CCU_H_ + +#define RST_BUS_R_TIMER 0 +#define RST_BUS_R_TWD 1 +#define RST_BUS_R_PPU 2 +#define RST_BUS_R_IR_RX 3 +#define RST_BUS_R_RTC 4 +#define RST_BUS_R_CPUCFG 5 + +#endif /* _DT_BINDINGS_RST_SUN20I_D1_R_CCU_H_ */ -- cgit v1.2.3 From c214f124161d446b340597e7c968e0a2dc149142 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 9 Nov 2021 19:57:10 +0000 Subject: arch_topology: Introduce thermal pressure update function The thermal pressure is a mechanism which is used for providing information about reduced CPU performance to the scheduler. Usually code has to convert the value from frequency units into capacity units, which are understandable by the scheduler. Create a common conversion code which can be just used via a handy API. Internally, the topology_update_thermal_pressure() operates on frequency in MHz and max CPU frequency is taken from 'freq_factor' (per-cpu). Signed-off-by: Lukasz Luba Reviewed-by: Thara Gopinath Signed-off-by: Viresh Kumar --- include/linux/arch_topology.h | 3 +++ include/linux/sched/topology.h | 7 +++++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index b97cea83b25e..ace1e5dcf773 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -59,6 +59,9 @@ static inline unsigned long topology_get_thermal_pressure(int cpu) void topology_set_thermal_pressure(const struct cpumask *cpus, unsigned long th_pressure); +void topology_update_thermal_pressure(const struct cpumask *cpus, + unsigned long capped_freq); + struct cpu_topology { int thread_id; int core_id; diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index c07bfa2d80f2..6e89a8e43aa7 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -273,6 +273,13 @@ void arch_set_thermal_pressure(const struct cpumask *cpus, { } #endif +#ifndef arch_update_thermal_pressure +static __always_inline +void arch_update_thermal_pressure(const struct cpumask *cpus, + unsigned long capped_frequency) +{ } +#endif + static inline int task_node(const struct task_struct *p) { return cpu_to_node(task_cpu(p)); -- cgit v1.2.3 From 7e97b3dc2556743dd02612c92a8de7026e8d7dc9 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 9 Nov 2021 19:57:14 +0000 Subject: arch_topology: Remove unused topology_set_thermal_pressure() and related There is no need of this function (and related) since code has been converted to use the new arch_update_thermal_pressure() API. The old code can be removed. Signed-off-by: Lukasz Luba Signed-off-by: Viresh Kumar --- include/linux/arch_topology.h | 3 --- include/linux/sched/topology.h | 7 ------- 2 files changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index ace1e5dcf773..cce6136b300a 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -56,9 +56,6 @@ static inline unsigned long topology_get_thermal_pressure(int cpu) return per_cpu(thermal_pressure, cpu); } -void topology_set_thermal_pressure(const struct cpumask *cpus, - unsigned long th_pressure); - void topology_update_thermal_pressure(const struct cpumask *cpus, unsigned long capped_freq); diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 6e89a8e43aa7..8054641c0a7b 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -266,13 +266,6 @@ unsigned long arch_scale_thermal_pressure(int cpu) } #endif -#ifndef arch_set_thermal_pressure -static __always_inline -void arch_set_thermal_pressure(const struct cpumask *cpus, - unsigned long th_pressure) -{ } -#endif - #ifndef arch_update_thermal_pressure static __always_inline void arch_update_thermal_pressure(const struct cpumask *cpus, -- cgit v1.2.3 From 71b597ef5d46a326fb0d5cbfc1c6ff1d73cdc7f9 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 17 Nov 2021 21:18:36 -0600 Subject: dt-bindings: clock: sunxi: Export CLK_DRAM for devfreq The MBUS node needs to reference the CLK_DRAM clock, as the MBUS hardware implements memory dynamic frequency scaling using this clock. Export this clock for SoCs which will be getting a devfreq driver. Acked-by: Rob Herring Reviewed-by: Chanwoo Choi Signed-off-by: Samuel Holland Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20211118031841.42315-2-samuel@sholland.org --- include/dt-bindings/clock/sun50i-a64-ccu.h | 2 +- include/dt-bindings/clock/sun8i-h3-ccu.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/sun50i-a64-ccu.h b/include/dt-bindings/clock/sun50i-a64-ccu.h index 318eb15c414c..175892189e9d 100644 --- a/include/dt-bindings/clock/sun50i-a64-ccu.h +++ b/include/dt-bindings/clock/sun50i-a64-ccu.h @@ -113,7 +113,7 @@ #define CLK_USB_OHCI0 91 #define CLK_USB_OHCI1 93 - +#define CLK_DRAM 94 #define CLK_DRAM_VE 95 #define CLK_DRAM_CSI 96 #define CLK_DRAM_DEINTERLACE 97 diff --git a/include/dt-bindings/clock/sun8i-h3-ccu.h b/include/dt-bindings/clock/sun8i-h3-ccu.h index 30d2d15373a2..5d4ada2c22e6 100644 --- a/include/dt-bindings/clock/sun8i-h3-ccu.h +++ b/include/dt-bindings/clock/sun8i-h3-ccu.h @@ -126,7 +126,7 @@ #define CLK_USB_OHCI1 93 #define CLK_USB_OHCI2 94 #define CLK_USB_OHCI3 95 - +#define CLK_DRAM 96 #define CLK_DRAM_VE 97 #define CLK_DRAM_CSI 98 #define CLK_DRAM_DEINTERLACE 99 -- cgit v1.2.3 From a0c2ccd9b5ad0a9e838158404e041b5a8ff762dd Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 23 Nov 2021 15:50:46 +0800 Subject: mctp: Add MCTP-over-serial transport binding This change adds a MCTP Serial transport binding, as defined by DMTF specificiation DSP0253 - "MCTP Serial Transport Binding". This is implemented as a new serial line discipline, and can be attached to arbitrary tty devices. From the Kconfig description: This driver provides an MCTP-over-serial interface, through a serial line-discipline, as defined by DMTF specification "DSP0253 - MCTP Serial Transport Binding". By attaching the ldisc to a serial device, we get a new net device to transport MCTP packets. This allows communication with external MCTP endpoints which use serial as their transport. It can also be used as an easy way to provide MCTP connectivity between virtual machines, by forwarding data between simple virtual serial devices. Say y here if you need to connect to MCTP endpoints over serial. To compile as a module, use m; the module will be called mctp-serial. Once the N_MCTP line discipline is set [using ioctl(TCIOSETD)], we get a new netdev suitable for MCTP communication. The 'mctp' utility[1] provides a simple wrapper for this ioctl, using 'link serial ': # mctp link serial /dev/ttyS0 & # mctp link dev lo index 1 address 0x00:00:00:00:00:00 net 1 mtu 65536 up dev mctpserial0 index 5 address 0x(no-addr) net 1 mtu 68 down [1]: https://github.com/CodeConstruct/mctp Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/uapi/linux/tty.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/tty.h b/include/uapi/linux/tty.h index 376cccf397be..a58deb3061eb 100644 --- a/include/uapi/linux/tty.h +++ b/include/uapi/linux/tty.h @@ -38,5 +38,6 @@ #define N_NCI 25 /* NFC NCI UART */ #define N_SPEAKUP 26 /* Speakup communication with synths */ #define N_NULL 27 /* Null ldisc used for error handling */ +#define N_MCTP 28 /* MCTP-over-serial */ #endif /* _UAPI_LINUX_TTY_H */ -- cgit v1.2.3 From 1e84dc6b7bbfc4d1dd846decece4611b7e035772 Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Tue, 23 Nov 2021 10:54:30 +0800 Subject: neigh: introduce neigh_confirm() helper function Add neigh_confirm() for the confirmed member in struct neighbour, it can be called as an independent unit by other functions. Signed-off-by: Yajun Deng Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/arp.h | 8 +------- include/net/ndisc.h | 16 ++-------------- include/net/neighbour.h | 11 +++++++++++ include/net/sock.h | 5 +---- 4 files changed, 15 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/net/arp.h b/include/net/arp.h index 4950191f6b2b..031374ac2f22 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -53,13 +53,7 @@ static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key) rcu_read_lock_bh(); n = __ipv4_neigh_lookup_noref(dev, key); - if (n) { - unsigned long now = jiffies; - - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); - } + neigh_confirm(n); rcu_read_unlock_bh(); } diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 04341d86585d..53cb8de0e589 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -411,13 +411,7 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev, rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref(dev, pkey); - if (n) { - unsigned long now = jiffies; - - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); - } + neigh_confirm(n); rcu_read_unlock_bh(); } @@ -428,13 +422,7 @@ static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref_stub(dev, pkey); - if (n) { - unsigned long now = jiffies; - - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); - } + neigh_confirm(n); rcu_read_unlock_bh(); } diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 38a0c1d24570..55af812c3ed5 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -321,6 +321,17 @@ static inline struct neighbour *__neigh_lookup_noref(struct neigh_table *tbl, return ___neigh_lookup_noref(tbl, tbl->key_eq, tbl->hash, pkey, dev); } +static inline void neigh_confirm(struct neighbour *n) +{ + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); + } +} + void neigh_table_init(int index, struct neigh_table *tbl); int neigh_table_clear(int index, struct neigh_table *tbl); struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, diff --git a/include/net/sock.h b/include/net/sock.h index a79fc772324e..e7c231373875 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2135,13 +2135,10 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) { if (skb_get_dst_pending_confirm(skb)) { struct sock *sk = skb->sk; - unsigned long now = jiffies; - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); if (sk && READ_ONCE(sk->sk_dst_pending_confirm)) WRITE_ONCE(sk->sk_dst_pending_confirm, 0); + neigh_confirm(n); } } -- cgit v1.2.3 From cff6f593251cdf5398dc3c57f7032b8e9dcb633e Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Tue, 23 Nov 2021 12:36:47 +0200 Subject: regulator: rohm-generic: iniline stub function The function rohm_regulator_set_voltage_sel_restricted() has a stub implementation. Linux-next testing spot following: include/linux/mfd/rohm-generic.h:93:12: error: 'rohm_regulator_set_voltage_sel_restricted' defined but not used Fix this by inlining the stub. Fixes: 8b6e88555971 ("regulator: rohm-regulator: add helper for restricted voltage setting") Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/YZzEP3S7U15bTDAI@fedora Signed-off-by: Mark Brown --- include/linux/mfd/rohm-generic.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h index 35c5866f48b7..080d60adcd5f 100644 --- a/include/linux/mfd/rohm-generic.h +++ b/include/linux/mfd/rohm-generic.h @@ -90,7 +90,8 @@ static inline int rohm_regulator_set_dvs_levels(const struct rohm_dvs_config *dv { return 0; } -static int rohm_regulator_set_voltage_sel_restricted(struct regulator_dev *rdev, + +static inline int rohm_regulator_set_voltage_sel_restricted(struct regulator_dev *rdev, unsigned int sel) { return 0; -- cgit v1.2.3 From 2106efda785b55a8957efed9a52dfa28ee0d7280 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 22 Nov 2021 17:24:47 -0800 Subject: net: remove .ndo_change_proto_down .ndo_change_proto_down was added seemingly to enable out-of-tree implementations. Over 2.5yrs later we still have no real users upstream. Hardwire the generic implementation for now, we can revert once real users materialize. (rocker is a test vehicle, not a user.) We need to drop the optimization on the sysfs side, because unlike ndos priv_flags will be changed at runtime, so we'd need READ_ONCE/WRITE_ONCE everywhere.. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index df049864661d..db3bff1ae7fd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1297,11 +1297,6 @@ struct netdev_net_notifier { * TX queue. * int (*ndo_get_iflink)(const struct net_device *dev); * Called to get the iflink value of this device. - * void (*ndo_change_proto_down)(struct net_device *dev, - * bool proto_down); - * This function is used to pass protocol port error state information - * to the switch driver. The switch driver can react to the proto_down - * by doing a phys down on the associated switch port. * int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb); * This function is used to get egress tunnel information for given skb. * This is useful for retrieving outer tunnel header parameters while @@ -1542,8 +1537,6 @@ struct net_device_ops { int queue_index, u32 maxrate); int (*ndo_get_iflink)(const struct net_device *dev); - int (*ndo_change_proto_down)(struct net_device *dev, - bool proto_down); int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb); void (*ndo_set_rx_headroom)(struct net_device *dev, @@ -1612,6 +1605,7 @@ struct net_device_ops { * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with * skb_headlen(skb) == 0 (data starts from frag0) + * @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1646,6 +1640,7 @@ enum netdev_priv_flags { IFF_L3MDEV_RX_HANDLER = 1<<29, IFF_LIVE_RENAME_OK = 1<<30, IFF_TX_SKB_NO_LINEAR = 1<<31, + IFF_CHANGE_PROTO_DOWN = BIT_ULL(32), }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1982,7 +1977,7 @@ struct net_device { /* Read-mostly cache-line for fast-path access */ unsigned int flags; - unsigned int priv_flags; + unsigned long long priv_flags; const struct net_device_ops *netdev_ops; int ifindex; unsigned short gflags; @@ -3735,7 +3730,6 @@ int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); int dev_change_proto_down(struct net_device *dev, bool proto_down); -int dev_change_proto_down_generic(struct net_device *dev, bool proto_down); void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, u32 value); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); -- cgit v1.2.3 From 985e9ece1e55a94da842f6c1f9ff84d587b26267 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 17 Nov 2021 20:07:35 +0200 Subject: ACPI: Make acpi_node_get_parent() local acpi_node_get_parent() isn't used outside drivers/acpi/property.c. Make it local. Signed-off-by: Sakari Ailus Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 668d007f0917..b28f8790192a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1182,7 +1182,6 @@ int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname, struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode, struct fwnode_handle *child); -struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode); struct acpi_probe_entry; typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *, @@ -1287,12 +1286,6 @@ acpi_get_next_subnode(const struct fwnode_handle *fwnode, return NULL; } -static inline struct fwnode_handle * -acpi_node_get_parent(const struct fwnode_handle *fwnode) -{ - return NULL; -} - static inline struct fwnode_handle * acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle *prev) -- cgit v1.2.3 From 37a72b08a3e1eb28053214dd8211eb09c2fd3187 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 22 Oct 2021 08:47:56 +0200 Subject: xen: add "not_essential" flag to struct xenbus_driver When booting the xenbus driver will wait for PV devices to have connected to their backends before continuing. The timeout is different between essential and non-essential devices. Non-essential devices are identified by their nodenames directly in the xenbus driver, which requires to update this list in case a new device type being non-essential is added (this was missed for several types in the past). In order to avoid this problem, add a "not_essential" flag to struct xenbus_driver which can be set to "true" by the respective frontend. Set this flag for the frontends currently regarded to be not essential (vkbs and vfb) and use it for testing in the xenbus driver. Signed-off-by: Juergen Gross Link: https://lore.kernel.org/r/20211022064800.14978-2-jgross@suse.com Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- include/xen/xenbus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index b94074c82772..b13eb86395e0 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -112,6 +112,7 @@ struct xenbus_driver { const char *name; /* defaults to ids[0].devicetype */ const struct xenbus_device_id *ids; bool allow_rebind; /* avoid setting xenstore closed during remove */ + bool not_essential; /* is not mandatory for boot progress */ int (*probe)(struct xenbus_device *dev, const struct xenbus_device_id *id); void (*otherend_changed)(struct xenbus_device *dev, -- cgit v1.2.3 From c6d5f1933085f9a92ed5c256a859ab31c7a35f88 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Mon, 22 Nov 2021 12:19:31 +0100 Subject: net: stmmac: Calculate CDC error only once The clock domain crossing error (CDC) is calculated at every fetch of Tx or Rx timestamps. It includes a division. Especially on arm32 based systems it is expensive. It also requires two conditionals in the hotpath. Add a compensation value cache to struct plat_stmmacenet_data and subtract it unconditionally in the RX/TX functions which spares the conditionals. The value is initialized to 0 and if supported calculated in the PTP initialization code. Suggested-by: Thomas Gleixner Signed-off-by: Kurt Kanzenbach Link: https://lore.kernel.org/r/20211122111931.135135-1-kurt@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index a6f03b36fc4f..89b8e208cd7b 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -241,6 +241,7 @@ struct plat_stmmacenet_data { unsigned int clk_ref_rate; unsigned int mult_fact_100ns; s32 ptp_max_adj; + u32 cdc_error_adj; struct reset_control *stmmac_rst; struct reset_control *stmmac_ahb_rst; struct stmmac_axi *axi; -- cgit v1.2.3 From e7049395b1c3085d12b5ba16d058c65598368853 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 22 Nov 2021 19:16:21 +0900 Subject: dccp/tcp: Remove an unused argument in inet_csk_listen_start(). The commit 1295e2cf3065 ("inet: minor optimization for backlog setting in listen(2)") added change so that sk_max_ack_backlog is initialised earlier in inet_dccp_listen() and inet_listen(). Since then, we no longer use backlog in inet_csk_listen_start(), so let's remove it. Signed-off-by: Kuniyuki Iwashima Acked-by: Yafang Shao Reviewed-by: Richard Sailer Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index fa6a87246a7b..4ad47d9f9d27 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -304,7 +304,7 @@ static inline __poll_t inet_csk_listen_poll(const struct sock *sk) (EPOLLIN | EPOLLRDNORM) : 0; } -int inet_csk_listen_start(struct sock *sk, int backlog); +int inet_csk_listen_start(struct sock *sk); void inet_csk_listen_stop(struct sock *sk); void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); -- cgit v1.2.3 From 86c82c8aeebf6db5df8ab73cec8333853c405070 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Wed, 24 Nov 2021 11:23:55 +0200 Subject: Revert "drm/i915/dg2: Tile 4 plane format support" Tile4 patch still needs an ack from userspace, IGT tests and some essential fixes, related to new .plane_caps attribute being added. This reverts commit 3c542cfa8266e3364938d055b3d548b7bed7f08e. Signed-off-by: Stanislav Lisovskiy Acked-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20211124092355.16668-1-stanislav.lisovskiy@intel.com --- include/uapi/drm/drm_fourcc.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 41184a94935d..7f652c96845b 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -564,14 +564,6 @@ extern "C" { * pitch is required to be a multiple of 4 tile widths. */ #define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC fourcc_mod_code(INTEL, 8) -/* - * Intel F-tiling(aka Tile4) layout - * - * This is a tiled layout using 4Kb tiles in row-major layout. - * Within the tile pixels are laid out in 64 byte units / sub-tiles in OWORD - * (16 bytes) chunks column-major.. - */ -#define I915_FORMAT_MOD_4_TILED fourcc_mod_code(INTEL, 9) /* * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks -- cgit v1.2.3 From 393c3714081a53795bbff0e985d24146def6f57f Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 18 Nov 2021 15:00:08 -0800 Subject: kernfs: switch global kernfs_rwsem lock to per-fs lock The kernfs implementation has big lock granularity(kernfs_rwsem) so every kernfs-based(e.g., sysfs, cgroup) fs are able to compete the lock. It makes trouble for some cases to wait the global lock for a long time even though they are totally independent contexts each other. A general example is process A goes under direct reclaim with holding the lock when it accessed the file in sysfs and process B is waiting the lock with exclusive mode and then process C is waiting the lock until process B could finish the job after it gets the lock from process A. This patch switches the global kernfs_rwsem to per-fs lock, which put the rwsem into kernfs_root. Suggested-by: Tejun Heo Acked-by: Tejun Heo Signed-off-by: Minchan Kim Link: https://lore.kernel.org/r/20211118230008.2679780-1-minchan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 3ccce6f24548..9f650986a81b 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -16,6 +16,7 @@ #include #include #include +#include struct file; struct dentry; @@ -197,6 +198,7 @@ struct kernfs_root { struct list_head supers; wait_queue_head_t deactivate_waitq; + struct rw_semaphore kernfs_rwsem; }; struct kernfs_open_file { -- cgit v1.2.3 From b456abe63f60ad93c83a526d33b71574bc32656c Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 19 Nov 2021 17:08:50 -0600 Subject: ALSA: pcm: introduce INFO_NO_REWINDS flag When the hardware can only deal with a monotonically increasing appl_ptr, this flag can be set. In case the application requests a rewind, be it with a snd_pcm_rewind() or with a direct change of a mmap'ed pointer followed by a SNDRV_PCM_IOCTL_SYNC_PTR, this patch checks if a rewind occurred and returns an error. Credits to Takashi Iwai for identifying the path with SYNC_PTR and suggesting the pointer checks. Suggested-by: Takashi Iwai Reviewed-by: Takashi Iwai Reviewed-by: Ranjani Sridharan Reviewed-by: Kai Vehmanen Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20211119230852.206310-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/uapi/sound/asound.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 5fbb79e30819..ff7e638221c5 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -300,7 +300,7 @@ typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */ #define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */ #define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */ - +#define SNDRV_PCM_INFO_NO_REWINDS 0x20000000 /* hardware can only support monotonic changes of appl_ptr */ #define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */ #define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */ -- cgit v1.2.3 From 1b6ed6bf32fb22ef8e3572fc9c0f6454adf1ca40 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 24 Nov 2021 09:17:37 +0200 Subject: regulator: Drop unnecessary struct member The irq_flags from the regulator IRQ helper description struct was never used. The IRQ flags are passed as parameters to helper registration instead. Remove the unnecessary struct field. Fixes: 7111c6d1b31b ("regulator: IRQ based event/error notification helpers") Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/5f6371e178453fa2b165da50452f7db4e986debb.1637736436.git.matti.vaittinen@fi.rohmeurope.com Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 1cb8071fee34..53b25cd7ead0 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -554,7 +554,6 @@ struct regulator_irq_data { */ struct regulator_irq_desc { const char *name; - int irq_flags; int fatal_cnt; int reread_ms; int irq_off_ms; -- cgit v1.2.3 From 6fadec4c5561e2fbe1dfa8a7da9bc58d094a8f04 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 24 Nov 2021 09:16:45 +0200 Subject: regulator: Add regulator_err2notif() helper Help drivers avoid storing both supported notification and supported error flags by supporting conversion from regulator error to notification. This may help saving some bytes. Add helper for finding the regulator notification corresponding to a regulator error. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/eb1755ac0569ff07ffa466cf8912c6fd50e7c7c6.1637736436.git.matti.vaittinen@fi.rohmeurope.com Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 53b25cd7ead0..6c6ec9658c30 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -645,6 +645,40 @@ struct regulator_dev { spinlock_t err_lock; }; +/* + * Convert error flags to corresponding notifications. + * + * Can be used by drivers which use the notification helpers to + * find out correct notification flags based on the error flags. Drivers + * can avoid storing both supported notification and error flags which + * may save few bytes. + */ +static inline int regulator_err2notif(int err) +{ + switch (err) { + case REGULATOR_ERROR_UNDER_VOLTAGE: + return REGULATOR_EVENT_UNDER_VOLTAGE; + case REGULATOR_ERROR_OVER_CURRENT: + return REGULATOR_EVENT_OVER_CURRENT; + case REGULATOR_ERROR_REGULATION_OUT: + return REGULATOR_EVENT_REGULATION_OUT; + case REGULATOR_ERROR_FAIL: + return REGULATOR_EVENT_FAIL; + case REGULATOR_ERROR_OVER_TEMP: + return REGULATOR_EVENT_OVER_TEMP; + case REGULATOR_ERROR_UNDER_VOLTAGE_WARN: + return REGULATOR_EVENT_UNDER_VOLTAGE_WARN; + case REGULATOR_ERROR_OVER_CURRENT_WARN: + return REGULATOR_EVENT_OVER_CURRENT_WARN; + case REGULATOR_ERROR_OVER_VOLTAGE_WARN: + return REGULATOR_EVENT_OVER_VOLTAGE_WARN; + case REGULATOR_ERROR_OVER_TEMP_WARN: + return REGULATOR_EVENT_OVER_TEMP_WARN; + } + return 0; +} + + struct regulator_dev * regulator_register(const struct regulator_desc *regulator_desc, const struct regulator_config *config); -- cgit v1.2.3 From a764ff77d697a4a13e69b3379cc613f7409c6b9a Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 24 Nov 2021 09:17:13 +0200 Subject: regulator: irq_helper: Provide helper for trivial IRQ notifications Provide a generic map_event helper for regulators which have a notification IRQ with single, well defined purpose. Eg, IRQ always indicates exactly one event for exactly one regulator device. For such IRQs the mapping is trivial. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/603b7ed1938013a00371c1e7ccc63dfb16982b87.1637736436.git.matti.vaittinen@fi.rohmeurope.com Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 6c6ec9658c30..4078c7776453 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -700,6 +700,8 @@ void *regulator_irq_helper(struct device *dev, int irq_flags, int common_errs, int *per_rdev_errs, struct regulator_dev **rdev, int rdev_amount); void regulator_irq_helper_cancel(void **handle); +int regulator_irq_map_event_simple(int irq, struct regulator_irq_data *rid, + unsigned long *dev_mask); void *rdev_get_drvdata(struct regulator_dev *rdev); struct device *rdev_get_dev(struct regulator_dev *rdev); -- cgit v1.2.3 From 432dd1fc134ef902b049b26839edfd3fdc1f8dc0 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 24 Nov 2021 07:57:49 +0200 Subject: regulator: rohm-generic: remove unused dummies Function rohm_regulator_set_voltage_sel_restricted() and rohm_regulator_set_dvs_levels() had inlined dummy implementations for cases when the real implementation was not configured in. All of the drivers who issue the call to these functions do SELECT the real implementation from the Kconfig. There should be no cases where the real implementation was not selected by the drivers using these functions - such a situation is likely to be an error which deserves to be noticed at compile-time. These dummies could in theory be used for compile-testing the drivers only (without the generic rohm regulator pieces). However, for such compile testing we should manually drop the selection from KConfig - and I guess that if it does not work out-of-the-box, then it is not going to happen. Especially when there should be no reason to omit compile-testing the generic rohm_regulator part. Crash test dummies. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/YZ3UXXrk/Efe7Scj@fedora Signed-off-by: Mark Brown --- include/linux/mfd/rohm-generic.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h index 080d60adcd5f..5ed97a1d0908 100644 --- a/include/linux/mfd/rohm-generic.h +++ b/include/linux/mfd/rohm-generic.h @@ -82,20 +82,6 @@ int rohm_regulator_set_dvs_levels(const struct rohm_dvs_config *dvs, int rohm_regulator_set_voltage_sel_restricted(struct regulator_dev *rdev, unsigned int sel); -#else -static inline int rohm_regulator_set_dvs_levels(const struct rohm_dvs_config *dvs, - struct device_node *np, - const struct regulator_desc *desc, - struct regmap *regmap) -{ - return 0; -} - -static inline int rohm_regulator_set_voltage_sel_restricted(struct regulator_dev *rdev, - unsigned int sel) -{ - return 0; -} #endif #endif -- cgit v1.2.3 From 2338e7bcef445059a99848a3eddde0b556277663 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 15 Nov 2021 15:10:01 +0300 Subject: device property: Remove device_add_properties() API There are no more users for it. Reviewed-by: Andy Shevchenko Signed-off-by: Heikki Krogerus Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/property.h b/include/linux/property.h index 88fa726a76df..16f736c698a2 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -378,10 +378,6 @@ property_entries_dup(const struct property_entry *properties); void property_entries_free(const struct property_entry *properties); -int device_add_properties(struct device *dev, - const struct property_entry *properties); -void device_remove_properties(struct device *dev); - bool device_dma_supported(struct device *dev); enum dev_dma_attr device_get_dma_attr(struct device *dev); -- cgit v1.2.3 From 6fd13452c1a2e6dfe5b9a4c84c1144383cc55472 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 10 Nov 2021 13:16:40 +0200 Subject: ACPI: processor: Replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_numa.h | 1 - include/acpi/processor.h | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h index 68e4d80c1b32..b5f594754a9e 100644 --- a/include/acpi/acpi_numa.h +++ b/include/acpi/acpi_numa.h @@ -3,7 +3,6 @@ #define __ACPI_NUMA_H #ifdef CONFIG_ACPI_NUMA -#include #include /* Proximity bitmap length */ diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 683e124ad517..194027371928 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -2,11 +2,16 @@ #ifndef __ACPI_PROCESSOR_H #define __ACPI_PROCESSOR_H -#include #include #include #include +#include +#include +#include #include +#include +#include + #include #define ACPI_PROCESSOR_CLASS "processor" -- cgit v1.2.3 From 04c76b41ca974b508522831441dd7e5b1b59cbb0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 10 Nov 2021 15:49:32 +0000 Subject: io_uring: add option to skip CQE posting Emitting a CQE is expensive from the kernel perspective. Often, it's also not convenient for the userspace, spends some cycles on processing and just complicates the logic. A similar problems goes for linked requests, where we post an CQE for each request in the link. Introduce a new flags, IOSQE_CQE_SKIP_SUCCESS, trying to help with it. When set and a request completed successfully, it won't generate a CQE. When fails, it produces an CQE, but all following linked requests will be CQE-less, regardless whether they have IOSQE_CQE_SKIP_SUCCESS or not. The notion of "fail" is the same as for link failing-cancellation, where it's opcode dependent, and _usually_ result >= 0 is a success, but not always. Linked timeouts are a bit special. When the requests it's linked to was not attempted to be executed, e.g. failing linked requests, it follows the description above. Otherwise, whether a linked timeout will post a completion or not solely depends on IOSQE_CQE_SKIP_SUCCESS of that linked timeout request. Linked timeout never "fail" during execution, so for them it's unconditional. It's expected for users to not really care about the result of it but rely solely on the result of the master request. Another reason for such a treatment is that it's racy, and the timeout callback may be running awhile the master request posts its completion. use case 1: If one doesn't care about results of some requests, e.g. normal timeouts, just set IOSQE_CQE_SKIP_SUCCESS. Error result will still be posted and need to be handled. use case 2: Set IOSQE_CQE_SKIP_SUCCESS for all requests of a link but the last, and it'll post a completion only for the last one if everything goes right, otherwise there will be one only one CQE for the first failed request. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0220fbe06f7cf99e6fc71b4297bb1cb6c0e89c2c.1636559119.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index c45b5e9a9387..787f491f0d2a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -70,6 +70,7 @@ enum { IOSQE_IO_HARDLINK_BIT, IOSQE_ASYNC_BIT, IOSQE_BUFFER_SELECT_BIT, + IOSQE_CQE_SKIP_SUCCESS_BIT, }; /* @@ -87,6 +88,8 @@ enum { #define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT) /* select buffer from sqe->buf_group */ #define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT) +/* don't post CQE if request succeeded */ +#define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT) /* * io_uring_setup() flags @@ -289,6 +292,7 @@ struct io_uring_params { #define IORING_FEAT_EXT_ARG (1U << 8) #define IORING_FEAT_NATIVE_WORKERS (1U << 9) #define IORING_FEAT_RSRC_TAGS (1U << 10) +#define IORING_FEAT_CQE_SKIP (1U << 11) /* * io_uring_register(2) opcodes and arguments -- cgit v1.2.3 From 3f2bedabb62c6210df63b604dc988d2f7f56f947 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 26 Oct 2021 12:03:47 +0200 Subject: futex: Ensure futex_atomic_cmpxchg_inatomic() is present The boot-time detection of futex_atomic_cmpxchg_inatomic() has a bug on some 32-bit arm builds, and Thomas Gleixner suggested that setting CONFIG_HAVE_FUTEX_CMPXCHG would avoid the problem, as it is always present anyway. Looking into which other architectures could do the same showed that almost all architectures have it, the exceptions being: - some old 32-bit MIPS uniprocessor cores without ll/sc - one xtensa variant with no SMP - 32-bit SPARC when built for SMP Fix MIPS And Xtensa by rearranging the generic code to let it be used as a fallback. For SPARC, the SMP definition just ends up turning off futex anyway, so this can be done at Kconfig time instead. Note that sparc32 glibc requires the CASA instruction for its mutexes anyway, which is only available when running on SPARCv9 or LEON CPUs, but needs to be implemented in the sparc32 kernel for those. Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Acked-by: Max Filippov Acked-by: Geert Uytterhoeven Acked-by: Rich Felker Link: https://lore.kernel.org/r/20211026100432.1730393-1-arnd@kernel.org --- include/asm-generic/futex.h | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index f4c3470480c7..30e7fa63b5df 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -6,15 +6,22 @@ #include #include +#ifndef futex_atomic_cmpxchg_inatomic #ifndef CONFIG_SMP /* * The following implementation only for uniprocessor machines. * It relies on preempt_disable() ensuring mutual exclusion. * */ +#define futex_atomic_cmpxchg_inatomic(uval, uaddr, oldval, newval) \ + futex_atomic_cmpxchg_inatomic_local_generic(uval, uaddr, oldval, newval) +#define arch_futex_atomic_op_inuser(op, oparg, oval, uaddr) \ + arch_futex_atomic_op_inuser_local_generic(op, oparg, oval, uaddr) +#endif /* CONFIG_SMP */ +#endif /** - * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant + * arch_futex_atomic_op_inuser_local() - Atomic arithmetic operation with constant * argument and comparison of the previous * futex value with another constant. * @@ -28,7 +35,7 @@ * -ENOSYS - Operation not supported */ static inline int -arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr) +futex_atomic_op_inuser_local(int op, u32 oparg, int *oval, u32 __user *uaddr) { int oldval, ret; u32 tmp; @@ -75,7 +82,7 @@ out_pagefault_enable: } /** - * futex_atomic_cmpxchg_inatomic() - Compare and exchange the content of the + * futex_atomic_cmpxchg_inatomic_local() - Compare and exchange the content of the * uaddr with newval if the current value is * oldval. * @uval: pointer to store content of @uaddr @@ -87,10 +94,9 @@ out_pagefault_enable: * 0 - On success * -EFAULT - User access resulted in a page fault * -EAGAIN - Atomic operation was unable to complete due to contention - * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG) */ static inline int -futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, +futex_atomic_cmpxchg_inatomic_local(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { u32 val; @@ -112,19 +118,4 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return 0; } -#else -static inline int -arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr) -{ - return -ENOSYS; -} - -static inline int -futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, - u32 oldval, u32 newval) -{ - return -ENOSYS; -} - -#endif /* CONFIG_SMP */ #endif -- cgit v1.2.3 From f124034faa911ed534bf8c4881ad98dbbde2a966 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 24 Nov 2021 18:44:17 -0500 Subject: Revert "virtio_ring: validate used buffer length" This reverts commit 939779f5152d161b34f612af29e7dc1ac4472fcf. Attempts to validate length in the core did not work out: there turn out to exist multiple broken devices, and in particular legacy devices are known to be broken in this respect. We have ideas for handling this better in the next version but for now let's revert to a known good state to make sure drivers work for people. Signed-off-by: Michael S. Tsirkin --- include/linux/virtio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 44d0e09da2d9..41edbc01ffa4 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -152,7 +152,6 @@ size_t virtio_max_dma_size(struct virtio_device *vdev); * @feature_table_size: number of entries in the feature table array. * @feature_table_legacy: same as feature_table but when working in legacy mode. * @feature_table_size_legacy: number of entries in feature table legacy array. - * @suppress_used_validation: set to not have core validate used length * @probe: the function to call when a device is found. Returns 0 or -errno. * @scan: optional function to call after successful probe; intended * for virtio-scsi to invoke a scan. @@ -169,7 +168,6 @@ struct virtio_driver { unsigned int feature_table_size; const unsigned int *feature_table_legacy; unsigned int feature_table_size_legacy; - bool suppress_used_validation; int (*validate)(struct virtio_device *dev); int (*probe)(struct virtio_device *dev); void (*scan)(struct virtio_device *dev); -- cgit v1.2.3 From 083a7fba38885a8ffa03a2857e383421cefd36e6 Mon Sep 17 00:00:00 2001 From: Oder Chiou Date: Thu, 25 Nov 2021 13:58:11 +0800 Subject: ASoC: rt5640: Add the binding include file for the HDA header support The patch adds the binding include file for the HDA header support. Signed-off-by: Oder Chiou Link: https://lore.kernel.org/r/20211125055812.8911-1-oder_chiou@realtek.com Signed-off-by: Mark Brown --- include/dt-bindings/sound/rt5640.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/sound/rt5640.h b/include/dt-bindings/sound/rt5640.h index 154c9b4414f2..655f6946388a 100644 --- a/include/dt-bindings/sound/rt5640.h +++ b/include/dt-bindings/sound/rt5640.h @@ -16,6 +16,7 @@ #define RT5640_JD_SRC_GPIO2 4 #define RT5640_JD_SRC_GPIO3 5 #define RT5640_JD_SRC_GPIO4 6 +#define RT5640_JD_SRC_HDA_HEADER 7 #define RT5640_OVCD_SF_0P5 0 #define RT5640_OVCD_SF_0P75 1 -- cgit v1.2.3 From 53db28933e952a8536b002ba8b8c9443ccc0e939 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 25 Nov 2021 14:05:18 +0100 Subject: fuse: extend init flags FUSE_INIT flags are close to running out, so add another 32bits worth of space. Add FUSE_INIT_EXT flag to the old flags field in fuse_init_in. If this flag is set, then fuse_init_in is extended by 48bytes, in which a flags_hi field is allocated to contain the high 32bits of the flags. A flags_hi field is also added to fuse_init_out, allocated out of the remaining unused fields. Known userspace implementations of the fuse protocol have been checked to accept the extended FUSE_INIT request, but this might cause problems with other implementations. If that happens to be the case, the protocol negotiation will have to be extended with an extra initialization request roundtrip. Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index a1dc3ee1d17c..980f3998c11b 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -187,6 +187,10 @@ * * 7.35 * - add FOPEN_NOFLUSH + * + * 7.36 + * - extend fuse_init_in with reserved fields, add FUSE_INIT_EXT init flag + * - add flags2 to fuse_init_in and fuse_init_out */ #ifndef _LINUX_FUSE_H @@ -222,7 +226,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 35 +#define FUSE_KERNEL_MINOR_VERSION 36 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -341,6 +345,8 @@ struct fuse_file_lock { * write/truncate sgid is killed only if file has group * execute permission. (Same as Linux VFS behavior). * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in + * FUSE_INIT_EXT: extended fuse_init_in request + * FUSE_INIT_RESERVED: reserved, do not use */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -372,6 +378,9 @@ struct fuse_file_lock { #define FUSE_SUBMOUNTS (1 << 27) #define FUSE_HANDLE_KILLPRIV_V2 (1 << 28) #define FUSE_SETXATTR_EXT (1 << 29) +#define FUSE_INIT_EXT (1 << 30) +#define FUSE_INIT_RESERVED (1 << 31) +/* bits 32..63 get shifted down 32 bits into the flags2 field */ /** * CUSE INIT request/reply flags @@ -741,6 +750,8 @@ struct fuse_init_in { uint32_t minor; uint32_t max_readahead; uint32_t flags; + uint32_t flags2; + uint32_t unused[11]; }; #define FUSE_COMPAT_INIT_OUT_SIZE 8 @@ -757,7 +768,8 @@ struct fuse_init_out { uint32_t time_gran; uint16_t max_pages; uint16_t map_alignment; - uint32_t unused[8]; + uint32_t flags2; + uint32_t unused[7]; }; #define CUSE_INIT_INFO_MAX 4096 -- cgit v1.2.3 From 3e2b6fdbdc9ab5a02d9d5676a005f30780b97553 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Thu, 11 Nov 2021 09:32:49 -0500 Subject: fuse: send security context of inode on file When a new inode is created, send its security context to server along with creation request (FUSE_CREAT, FUSE_MKNOD, FUSE_MKDIR and FUSE_SYMLINK). This gives server an opportunity to create new file and set security context (possibly atomically). In all the configurations it might not be possible to set context atomically. Like nfs and ceph, use security_dentry_init_security() to dermine security context of inode and send it with create, mkdir, mknod, and symlink requests. Following is the information sent to server. fuse_sectx_header, fuse_secctx, xattr_name, security_context - struct fuse_secctx_header This contains total number of security contexts being sent and total size of all the security contexts (including size of fuse_secctx_header). - struct fuse_secctx This contains size of security context which follows this structure. There is one fuse_secctx instance per security context. - xattr name string This string represents name of xattr which should be used while setting security context. - security context This is the actual security context whose size is specified in fuse_secctx struct. Also add the FUSE_SECURITY_CTX flag for the `flags` field of the fuse_init_out struct. When this flag is set the kernel will append the security context for a newly created inode to the request (create, mkdir, mknod, and symlink). The server is responsible for ensuring that the inode appears atomically (preferrably) with the requested security context. For example, If the server is using SELinux and backed by a "real" linux file system that supports extended attributes it can write the security context value to /proc/thread-self/attr/fscreate before making the syscall to create the inode. This patch is based on patch from Chirantan Ekbote Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 980f3998c11b..3f0ea63fec08 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -191,6 +191,8 @@ * 7.36 * - extend fuse_init_in with reserved fields, add FUSE_INIT_EXT init flag * - add flags2 to fuse_init_in and fuse_init_out + * - add FUSE_SECURITY_CTX init flag + * - add security context to create, mkdir, symlink, and mknod requests */ #ifndef _LINUX_FUSE_H @@ -347,6 +349,8 @@ struct fuse_file_lock { * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in * FUSE_INIT_EXT: extended fuse_init_in request * FUSE_INIT_RESERVED: reserved, do not use + * FUSE_SECURITY_CTX: add security context to create, mkdir, symlink, and + * mknod */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -381,6 +385,7 @@ struct fuse_file_lock { #define FUSE_INIT_EXT (1 << 30) #define FUSE_INIT_RESERVED (1 << 31) /* bits 32..63 get shifted down 32 bits into the flags2 field */ +#define FUSE_SECURITY_CTX (1ULL << 32) /** * CUSE INIT request/reply flags @@ -877,9 +882,12 @@ struct fuse_dirent { char name[]; }; -#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) -#define FUSE_DIRENT_ALIGN(x) \ +/* Align variable length records to 64bit boundary */ +#define FUSE_REC_ALIGN(x) \ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) + +#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) +#define FUSE_DIRENT_ALIGN(x) FUSE_REC_ALIGN(x) #define FUSE_DIRENT_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) @@ -996,4 +1004,26 @@ struct fuse_syncfs_in { uint64_t padding; }; +/* + * For each security context, send fuse_secctx with size of security context + * fuse_secctx will be followed by security context name and this in turn + * will be followed by actual context label. + * fuse_secctx, name, context + */ +struct fuse_secctx { + uint32_t size; + uint32_t padding; +}; + +/* + * Contains the information about how many fuse_secctx structures are being + * sent and what's the total size of all security contexts (including + * size of fuse_secctx_header). + * + */ +struct fuse_secctx_header { + uint32_t size; + uint32_t nr_secctx; +}; + #endif /* _LINUX_FUSE_H */ -- cgit v1.2.3 From 635e4172bd0a43af943fb164799965fc9a9a705d Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 2 Nov 2021 07:38:10 +0100 Subject: arm: remove zte zx platform left-over Commit 89d4f98ae90d ("ARM: remove zte zx platform") missed to remove some definitions for this platform's debug and serial, e.g., code dependent on the config DEBUG_ZTE_ZX. Fortunately, ./scripts/checkkconfigsymbols.py detects this and warns: DEBUG_ZTE_ZX Referencing files: arch/arm/include/debug/pl01x.S Further review by Arnd Bergmann identified even more dead code in the amba serial driver. Remove all this left-over from the zte zx platform. Reviewed-by: Arnd Bergmann Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20211102063810.932-1-lukas.bulwahn@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/amba/bus.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index edfcf7a14dcd..6c7f47846971 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -90,14 +90,8 @@ enum amba_vendor { AMBA_VENDOR_ST = 0x80, AMBA_VENDOR_QCOM = 0x51, AMBA_VENDOR_LSI = 0xb6, - AMBA_VENDOR_LINUX = 0xfe, /* This value is not official */ }; -/* This is used to generate pseudo-ID for AMBA device */ -#define AMBA_LINUX_ID(conf, rev, part) \ - (((conf) & 0xff) << 24 | ((rev) & 0xf) << 20 | \ - AMBA_VENDOR_LINUX << 12 | ((part) & 0xfff)) - extern struct bus_type amba_bustype; #define to_amba_device(d) container_of(d, struct amba_device, dev) -- cgit v1.2.3 From 4167bd25ec3bc221387ec6811c05eadfe3cf1d3e Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 18 Nov 2021 08:31:24 +0100 Subject: mxser: move ids from pci_ids.h here There is no point having MOXA PCI device IDs in include/linux/pci_ids.h. Move them to the driver and sort them all by the ID. Cc: Bjorn Helgaas Cc: linux-pci@vger.kernel.org Acked-by: Bjorn Helgaas Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211118073125.12283-19-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/pci_ids.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 011f2f1ea5bb..c389a9c0f290 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1964,24 +1964,6 @@ #define PCI_DEVICE_ID_APPLICOM_PCI2000PFB 0x0003 #define PCI_VENDOR_ID_MOXA 0x1393 -#define PCI_DEVICE_ID_MOXA_RC7000 0x0001 -#define PCI_DEVICE_ID_MOXA_CP102 0x1020 -#define PCI_DEVICE_ID_MOXA_CP102UL 0x1021 -#define PCI_DEVICE_ID_MOXA_CP102U 0x1022 -#define PCI_DEVICE_ID_MOXA_C104 0x1040 -#define PCI_DEVICE_ID_MOXA_CP104U 0x1041 -#define PCI_DEVICE_ID_MOXA_CP104JU 0x1042 -#define PCI_DEVICE_ID_MOXA_CP104EL 0x1043 -#define PCI_DEVICE_ID_MOXA_CT114 0x1140 -#define PCI_DEVICE_ID_MOXA_CP114 0x1141 -#define PCI_DEVICE_ID_MOXA_CP118U 0x1180 -#define PCI_DEVICE_ID_MOXA_CP118EL 0x1181 -#define PCI_DEVICE_ID_MOXA_CP132 0x1320 -#define PCI_DEVICE_ID_MOXA_CP132U 0x1321 -#define PCI_DEVICE_ID_MOXA_CP134U 0x1340 -#define PCI_DEVICE_ID_MOXA_C168 0x1680 -#define PCI_DEVICE_ID_MOXA_CP168U 0x1681 -#define PCI_DEVICE_ID_MOXA_CP168EL 0x1682 #define PCI_DEVICE_ID_MOXA_CP204J 0x2040 #define PCI_DEVICE_ID_MOXA_C218 0x2180 #define PCI_DEVICE_ID_MOXA_C320 0x3200 -- cgit v1.2.3 From 5db96ef23bda6c2a61a51693c85b78b52d03f654 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 22 Nov 2021 12:16:48 +0100 Subject: tty: drop tty_schedule_flip() Since commit a9c3f68f3cd8d (tty: Fix low_latency BUG) in 2014, tty_flip_buffer_push() is only a wrapper to tty_schedule_flip(). All users were converted in the previous patches, so remove tty_schedule_flip() completely while inlining its body into tty_flip_buffer_push(). One less exported function. Reviewed-by: Johan Hovold Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211122111648.30379-4-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_flip.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index 9916acb5de49..483d41cbcbb7 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -17,7 +17,6 @@ int tty_insert_flip_string_fixed_flag(struct tty_port *port, int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars, size_t size); void tty_flip_buffer_push(struct tty_port *port); -void tty_schedule_flip(struct tty_port *port); int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag); static inline int tty_insert_flip_char(struct tty_port *port, -- cgit v1.2.3 From d78328bcc4d0e677f2ff83f4ae1f43c933fbd143 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 22 Nov 2021 10:45:29 +0100 Subject: tty: remove file from tty_ldisc_ops::ioctl and compat_ioctl After the previous patches, noone needs 'file' parameter in neither ioctl hook from tty_ldisc_ops. So remove 'file' from both of them. Cc: Marcel Holtmann Cc: Johan Hedberg Cc: Luiz Augusto von Dentz Cc: Wolfgang Grandegger Cc: Marc Kleine-Budde Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Andreas Koensgen Cc: Paul Mackerras Acked-by: Krzysztof Kozlowski [NFC] Acked-by: Dmitry Torokhov Acked-by: Marc Kleine-Budde Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211122094529.24171-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_ldisc.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index b85d84fb5f49..25f07017bbad 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -45,8 +45,7 @@ struct tty_struct; * some processing on the characters first. If this function is * not defined, the user will receive an EIO error. * - * int (*ioctl)(struct tty_struct * tty, struct file * file, - * unsigned int cmd, unsigned long arg); + * int (*ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); * * This function is called when the user requests an ioctl which * is not handled by the tty layer or the low-level tty driver. @@ -56,8 +55,8 @@ struct tty_struct; * low-level driver can "grab" an ioctl request before the line * discpline has a chance to see it. * - * int (*compat_ioctl)(struct tty_struct * tty, struct file * file, - * unsigned int cmd, unsigned long arg); + * int (*compat_ioctl)(struct tty_struct *tty, unsigned int cmd, + * unsigned long arg); * * Process ioctl calls from 32-bit process on 64-bit system * @@ -192,10 +191,10 @@ struct tty_ldisc_ops { void **cookie, unsigned long offset); ssize_t (*write)(struct tty_struct *tty, struct file *file, const unsigned char *buf, size_t nr); - int (*ioctl)(struct tty_struct *tty, struct file *file, - unsigned int cmd, unsigned long arg); - int (*compat_ioctl)(struct tty_struct *tty, struct file *file, - unsigned int cmd, unsigned long arg); + int (*ioctl)(struct tty_struct *tty, unsigned int cmd, + unsigned long arg); + int (*compat_ioctl)(struct tty_struct *tty, unsigned int cmd, + unsigned long arg); void (*set_termios)(struct tty_struct *tty, struct ktermios *old); __poll_t (*poll)(struct tty_struct *, struct file *, struct poll_table_struct *); -- cgit v1.2.3 From e88422bccda86808359f0d4179e25e5c2191ab4f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 24 Nov 2021 16:16:12 -0800 Subject: Bluetooth: HCI: Fix definition of hci_rp_read_stored_link_key Both max_num_keys and num_key are 2 octects: BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 1985: Max_Num_Keys: Size: 2 octets Range: 0x0000 to 0xFFFF Num_Keys_Read: Size: 2 octets Range: 0x0000 to 0xFFFF Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 4 ++-- include/net/bluetooth/hci_core.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 84db6b275231..923534da9c0f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1047,8 +1047,8 @@ struct hci_cp_read_stored_link_key { } __packed; struct hci_rp_read_stored_link_key { __u8 status; - __u8 max_keys; - __u8 num_keys; + __le16 max_keys; + __le16 num_keys; } __packed; #define HCI_OP_DELETE_STORED_LINK_KEY 0x0c12 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2560cfe80db8..bb07a6d0d597 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -352,8 +352,8 @@ struct hci_dev { __u16 lmp_subver; __u16 voice_setting; __u8 num_iac; - __u8 stored_max_keys; - __u8 stored_num_keys; + __u16 stored_max_keys; + __u16 stored_num_keys; __u8 io_capability; __s8 inq_tx_power; __u8 err_data_reporting; -- cgit v1.2.3 From 7978656caf2ad3963fb63797128ec8d71caa2798 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 24 Nov 2021 16:16:13 -0800 Subject: Bluetooth: HCI: Fix definition of hci_rp_delete_stored_link_key num_keys is actually 2 octects not 1: BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 1989: Num_Keys_Deleted: Size: 2 octets 0xXXXX Number of Link Keys Deleted Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 923534da9c0f..0d2a9216869b 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1058,7 +1058,7 @@ struct hci_cp_delete_stored_link_key { } __packed; struct hci_rp_delete_stored_link_key { __u8 status; - __u8 num_keys; + __le16 num_keys; } __packed; #define HCI_MAX_NAME_LENGTH 248 -- cgit v1.2.3 From ea13aed5e5dfbabd166759aaf9f4af3cb804875a Mon Sep 17 00:00:00 2001 From: Archie Pusaka Date: Thu, 25 Nov 2021 15:04:36 +0800 Subject: Bluetooth: Send device found event on name resolve failure Introducing NAME_REQUEST_FAILED flag that will be sent together with device found event on name resolve failure. This will provide the userspace with an information so it can decide not to resolve the name for these devices in the future. Signed-off-by: Archie Pusaka Reviewed-by: Miao-chen Chou Signed-off-by: Marcel Holtmann --- include/net/bluetooth/mgmt.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 23a0524061b7..107b25deae68 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -936,10 +936,11 @@ struct mgmt_ev_auth_failed { __u8 status; } __packed; -#define MGMT_DEV_FOUND_CONFIRM_NAME 0x01 -#define MGMT_DEV_FOUND_LEGACY_PAIRING 0x02 -#define MGMT_DEV_FOUND_NOT_CONNECTABLE 0x04 -#define MGMT_DEV_FOUND_INITIATED_CONN 0x08 +#define MGMT_DEV_FOUND_CONFIRM_NAME 0x01 +#define MGMT_DEV_FOUND_LEGACY_PAIRING 0x02 +#define MGMT_DEV_FOUND_NOT_CONNECTABLE 0x04 +#define MGMT_DEV_FOUND_INITIATED_CONN 0x08 +#define MGMT_DEV_FOUND_NAME_REQUEST_FAILED 0x10 #define MGMT_EV_DEVICE_FOUND 0x0012 struct mgmt_ev_device_found { -- cgit v1.2.3 From dbf6811abbfcc79d3cd5ce1ff53fe1c741167a1f Mon Sep 17 00:00:00 2001 From: Archie Pusaka Date: Thu, 25 Nov 2021 15:04:37 +0800 Subject: Bluetooth: Limit duration of Remote Name Resolve When doing remote name request, we cannot scan. In the normal case it's OK since we can expect it to finish within a short amount of time. However, there is a possibility to scan lots of devices that (1) requires Remote Name Resolve (2) is unresponsive to Remote Name Resolve When this happens, we are stuck to do Remote Name Resolve until all is done before continue scanning. This patch adds a time limit to stop us spending too long on remote name request. Signed-off-by: Archie Pusaka Reviewed-by: Miao-chen Chou Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index bb07a6d0d597..7bae8376fd6f 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -88,6 +88,7 @@ struct discovery_state { u8 (*uuids)[16]; unsigned long scan_start; unsigned long scan_duration; + unsigned long name_resolve_timeout; }; #define SUSPEND_NOTIFIER_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ @@ -1759,6 +1760,8 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); #define DISCOV_LE_FAST_ADV_INT_MIN 0x00A0 /* 100 msec */ #define DISCOV_LE_FAST_ADV_INT_MAX 0x00F0 /* 150 msec */ +#define NAME_RESOLVE_DURATION msecs_to_jiffies(10240) /* 10.24 sec */ + void mgmt_fill_version_info(void *ver); int mgmt_new_settings(struct hci_dev *hdev); void mgmt_index_added(struct hci_dev *hdev); -- cgit v1.2.3 From 8abe19703825eda9c49f54624af57546c23af53f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 25 Nov 2021 14:58:08 +0200 Subject: net: dsa: felix: enable cut-through forwarding between ports by default The VSC9959 switch embedded within NXP LS1028A (and that version of Ocelot switches only) supports cut-through forwarding - meaning it can start the process of looking up the destination ports for a packet, and forward towards those ports, before the entire packet has been received (as opposed to the store-and-forward mode). The up side is having lower forwarding latency for large packets. The down side is that frames with FCS errors are forwarded instead of being dropped. However, erroneous frames do not result in incorrect updates of the FDB or incorrect policer updates, since these processes are deferred inside the switch to the end of frame. Since the switch starts the cut-through forwarding process after all packet headers (including IP, if any) have been processed, packets with large headers and small payload do not see the benefit of lower forwarding latency. There are two cases that need special attention. The first is when a packet is multicast (or flooded) to multiple destinations, one of which doesn't have cut-through forwarding enabled. The switch deals with this automatically by disabling cut-through forwarding for the frame towards all destination ports. The second is when a packet is forwarded from a port of lower link speed towards a port of higher link speed. This is not handled by the hardware and needs software intervention. Since we practically need to update the cut-through forwarding domain from paths that aren't serialized by the rtnl_mutex (phylink mac_link_down/mac_link_up ops), this means we need to serialize physical link events with user space updates of bonding/bridging domains. Enabling cut-through forwarding is done per {egress port, traffic class}. I don't see any reason why this would be a configurable option as long as it works without issues, and there doesn't appear to be any user space configuration tool to toggle this on/off, so this patch enables cut-through forwarding on all eligible ports and traffic classes. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20211125125808.2383984-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- include/soc/mscc/ocelot.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 89d17629efe5..33f2e8c9e88b 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -561,6 +561,7 @@ struct ocelot_ops { int (*psfp_filter_del)(struct ocelot *ocelot, struct flow_cls_offload *f); int (*psfp_stats_get)(struct ocelot *ocelot, struct flow_cls_offload *f, struct flow_stats *stats); + void (*cut_through_fwd)(struct ocelot *ocelot); }; struct ocelot_vcap_policer { @@ -655,6 +656,8 @@ struct ocelot_port { struct net_device *bridge; u8 stp_state; + + int speed; }; struct ocelot { @@ -712,6 +715,8 @@ struct ocelot { /* Lock for serializing access to the MAC table */ struct mutex mact_lock; + /* Lock for serializing forwarding domain changes */ + struct mutex fwd_domain_lock; struct workqueue_struct *owq; @@ -811,7 +816,9 @@ void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs); int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled, struct netlink_ext_ack *extack); void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state); -void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot); +u32 ocelot_get_dsa_8021q_cpu_mask(struct ocelot *ocelot); +u32 ocelot_get_bridge_fwd_mask(struct ocelot *ocelot, int src_port); +void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot, bool joining); int ocelot_port_pre_bridge_flags(struct ocelot *ocelot, int port, struct switchdev_brport_flags val); void ocelot_port_bridge_flags(struct ocelot *ocelot, int port, -- cgit v1.2.3 From 703319094c9c2bf34f65d3496ccb350149fdd14b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 24 Nov 2021 14:26:14 -0500 Subject: sctp: make the raise timer more simple and accurate Currently, the probe timer is reused as the raise timer when PLPMTUD is in the Search Complete state. raise_count was introduced to count how many times the probe timer has timed out. When raise_count reaches to 30, the raise timer handler will be triggered. During the whole processing above, the timer keeps timing out every probe_ interval. It is a waste for the Search Complete state, as the raise timer only needs to time out after 30 * probe_interval. Since the raise timer and probe timer are never used at the same time, it is no need to keep probe timer 'alive' in the Search Complete state. This patch to introduce sctp_transport_reset_raise_timer() to start the timer as the raise timer when entering the Search Complete state. When entering the other states, sctp_transport_reset_probe_timer() will still be called to reset the timer to the probe timer. raise_count can be removed from sctp_transport as no need to count probe timer timeout for raise timer timeout. last_rtx_chunks can be removed as sctp_transport_reset_probe_timer() can be called in the place where asoc rtx_data_chunks is changed. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/edb0e48988ea85997488478b705b11ddc1ba724a.1637781974.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- include/net/sctp/structs.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 899c29c326ba..e7d1ed7a3dfb 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -984,12 +984,10 @@ struct sctp_transport { } cacc; struct { - __u32 last_rtx_chunks; __u16 pmtu; __u16 probe_size; __u16 probe_high; - __u8 probe_count:3; - __u8 raise_count:5; + __u8 probe_count; __u8 state; } pl; /* plpmtud related */ @@ -1011,6 +1009,7 @@ void sctp_transport_reset_t3_rtx(struct sctp_transport *); void sctp_transport_reset_hb_timer(struct sctp_transport *); void sctp_transport_reset_reconf_timer(struct sctp_transport *transport); void sctp_transport_reset_probe_timer(struct sctp_transport *transport); +void sctp_transport_reset_raise_timer(struct sctp_transport *transport); int sctp_transport_hold(struct sctp_transport *); void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); @@ -1025,7 +1024,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); -bool sctp_transport_pl_send(struct sctp_transport *t); +void sctp_transport_pl_send(struct sctp_transport *t); bool sctp_transport_pl_recv(struct sctp_transport *t); -- cgit v1.2.3 From 0bd28476f6363c7ccc841fe6a0ab0dd1fdb822f6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Nov 2021 12:24:45 -0800 Subject: gro: optimize skb_gro_postpull_rcsum() We can leverage third argument to csum_partial(): X = csum_sub(X, csum_partial(start, len, 0)); --> X = csum_add(X, ~csum_partial(start, len, 0)); --> X = ~csum_partial(start, len, ~X); This removes one add/adc pair and its dependency against the carry flag. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/gro.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/gro.h b/include/net/gro.h index 9c22a010369c..b1139fca7c43 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -173,8 +173,8 @@ static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { if (NAPI_GRO_CB(skb)->csum_valid) - NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum, - csum_partial(start, len, 0)); + NAPI_GRO_CB(skb)->csum = ~csum_partial(start, len, + ~NAPI_GRO_CB(skb)->csum); } /* GRO checksum functions. These are logical equivalents of the normal -- cgit v1.2.3 From 29c3002644bdd653f6ec6407d25135d0a4f7cefb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Nov 2021 12:24:46 -0800 Subject: net: optimize skb_postpull_rcsum() Remove one pair of add/adc instructions and their dependency against carry flag. We can leverage third argument to csum_partial(): X = csum_block_sub(X, csum_partial(start, len, 0), 0); --> X = csum_block_add(X, ~csum_partial(start, len, 0), 0); --> X = ~csum_partial(start, len, ~X); Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index eba256af64a5..eae4bd3237a4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3485,7 +3485,11 @@ __skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len, static inline void skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - __skb_postpull_rcsum(skb, start, len, 0); + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = ~csum_partial(start, len, ~skb->csum); + else if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start_offset(skb) < 0) + skb->ip_summed = CHECKSUM_NONE; } static __always_inline void -- cgit v1.2.3 From ce8ce31b2c5c8b18667784b8c515650c65d57b4e Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Mon, 15 Nov 2021 15:18:04 +0100 Subject: crypto: drbg - prepare for more fine-grained tracking of seeding state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are two different randomness sources the DRBGs are getting seeded from, namely the jitterentropy source (if enabled) and get_random_bytes(). At initial DRBG seeding time during boot, the latter might not have collected sufficient entropy for seeding itself yet and thus, the DRBG implementation schedules a reseed work from a random_ready_callback once that has happened. This is particularly important for the !->pr DRBG instances, for which (almost) no further reseeds are getting triggered during their lifetime. Because collecting data from the jitterentropy source is a rather expensive operation, the aforementioned asynchronously scheduled reseed work restricts itself to get_random_bytes() only. That is, it in some sense amends the initial DRBG seed derived from jitterentropy output at full (estimated) entropy with fresh randomness obtained from get_random_bytes() once that has been seeded with sufficient entropy itself. With the advent of rng_is_initialized(), there is no real need for doing the reseed operation from an asynchronously scheduled work anymore and a subsequent patch will make it synchronous by moving it next to related logic already present in drbg_generate(). However, for tracking whether a full reseed including the jitterentropy source is required or a "partial" reseed involving only get_random_bytes() would be sufficient already, the boolean struct drbg_state's ->seeded member must become a tristate value. Prepare for this by introducing the new enum drbg_seed_state and change struct drbg_state's ->seeded member's type from bool to that type. For facilitating review, enum drbg_seed_state is made to only contain two members corresponding to the former ->seeded values of false and true resp. at this point: DRBG_SEED_STATE_UNSEEDED and DRBG_SEED_STATE_FULL. A third one for tracking the intermediate state of "seeded from jitterentropy only" will be introduced with a subsequent patch. There is no change in behaviour at this point. Signed-off-by: Nicolai Stange Reviewed-by: Stephan Müller Signed-off-by: Herbert Xu --- include/crypto/drbg.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index c4165126937e..92a87b23ad2f 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -105,6 +105,11 @@ struct drbg_test_data { struct drbg_string *testentropy; /* TEST PARAMETER: test entropy */ }; +enum drbg_seed_state { + DRBG_SEED_STATE_UNSEEDED, + DRBG_SEED_STATE_FULL, +}; + struct drbg_state { struct mutex drbg_mutex; /* lock around DRBG */ unsigned char *V; /* internal state 10.1.1.1 1a) */ @@ -127,7 +132,7 @@ struct drbg_state { struct crypto_wait ctr_wait; /* CTR mode async wait obj */ struct scatterlist sg_in, sg_out; /* CTR mode SGLs */ - bool seeded; /* DRBG fully seeded? */ + enum drbg_seed_state seeded; /* DRBG fully seeded? */ bool pr; /* Prediction resistance enabled? */ bool fips_primed; /* Continuous test primed? */ unsigned char *prev; /* FIPS 140-2 continuous test value */ -- cgit v1.2.3 From 2bcd25443868aa8863779a6ebc6c9319633025d2 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Mon, 15 Nov 2021 15:18:05 +0100 Subject: crypto: drbg - track whether DRBG was seeded with !rng_is_initialized() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the DRBG implementation schedules asynchronous works from random_ready_callbacks for reseeding the DRBG instances with output from get_random_bytes() once the latter has sufficient entropy available. However, as the get_random_bytes() initialization state can get queried by means of rng_is_initialized() now, there is no real need for this asynchronous reseeding logic anymore and it's better to keep things simple by doing it synchronously when needed instead, i.e. from drbg_generate() once rng_is_initialized() has flipped to true. Of course, for this to work, drbg_generate() would need some means by which it can tell whether or not rng_is_initialized() has flipped to true since the last seeding from get_random_bytes(). Or equivalently, whether or not the last seed from get_random_bytes() has happened when rng_is_initialized() was still evaluating to false. As it currently stands, enum drbg_seed_state allows for the representation of two different DRBG seeding states: DRBG_SEED_STATE_UNSEEDED and DRBG_SEED_STATE_FULL. The former makes drbg_generate() to invoke a full reseeding operation involving both, the rather expensive jitterentropy as well as the get_random_bytes() randomness sources. The DRBG_SEED_STATE_FULL state on the other hand implies that no reseeding at all is required for a !->pr DRBG variant. Introduce the new DRBG_SEED_STATE_PARTIAL state to enum drbg_seed_state for representing the condition that a DRBG was being seeded when rng_is_initialized() had still been false. In particular, this new state implies that - the given DRBG instance has been fully seeded from the jitterentropy source (if enabled) - and drbg_generate() is supposed to reseed from get_random_bytes() *only* once rng_is_initialized() turns to true. Up to now, the __drbg_seed() helper used to set the given DRBG instance's ->seeded state to constant DRBG_SEED_STATE_FULL. Introduce a new argument allowing for the specification of the to be written ->seeded value instead. Make the first of its two callers, drbg_seed(), determine the appropriate value based on rng_is_initialized(). The remaining caller, drbg_async_seed(), is known to get invoked only once rng_is_initialized() is true, hence let it pass constant DRBG_SEED_STATE_FULL for the new argument to __drbg_seed(). There is no change in behaviour, except for that the pr_devel() in drbg_generate() would now report "unseeded" for ->pr DRBG instances which had last been seeded when rng_is_initialized() was still evaluating to false. Signed-off-by: Nicolai Stange Reviewed-by: Stephan Müller Signed-off-by: Herbert Xu --- include/crypto/drbg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index 92a87b23ad2f..3ebdb1effe74 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -107,6 +107,7 @@ struct drbg_test_data { enum drbg_seed_state { DRBG_SEED_STATE_UNSEEDED, + DRBG_SEED_STATE_PARTIAL, /* Seeded with !rng_is_initialized() */ DRBG_SEED_STATE_FULL, }; -- cgit v1.2.3 From 074bcd4000e0d812bc253f86fedc40f81ed59ccc Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Mon, 15 Nov 2021 15:18:07 +0100 Subject: crypto: drbg - make reseeding from get_random_bytes() synchronous get_random_bytes() usually hasn't full entropy available by the time DRBG instances are first getting seeded from it during boot. Thus, the DRBG implementation registers random_ready_callbacks which would in turn schedule some work for reseeding the DRBGs once get_random_bytes() has sufficient entropy available. For reference, the relevant history around handling DRBG (re)seeding in the context of a not yet fully seeded get_random_bytes() is: commit 16b369a91d0d ("random: Blocking API for accessing nonblocking_pool") commit 4c7879907edd ("crypto: drbg - add async seeding operation") commit 205a525c3342 ("random: Add callback API for random pool readiness") commit 57225e679788 ("crypto: drbg - Use callback API for random readiness") commit c2719503f5e1 ("random: Remove kernel blocking API") However, some time later, the initialization state of get_random_bytes() has been made queryable via rng_is_initialized() introduced with commit 9a47249d444d ("random: Make crng state queryable"). This primitive now allows for streamlining the DRBG reseeding from get_random_bytes() by replacing that aforementioned asynchronous work scheduling from random_ready_callbacks with some simpler, synchronous code in drbg_generate() next to the related logic already present therein. Apart from improving overall code readability, this change will also enable DRBG users to rely on wait_for_random_bytes() for ensuring that the initial seeding has completed, if desired. The previous patches already laid the grounds by making drbg_seed() to record at each DRBG instance whether it was being seeded at a time when rng_is_initialized() still had been false as indicated by ->seeded == DRBG_SEED_STATE_PARTIAL. All that remains to be done now is to make drbg_generate() check for this condition, determine whether rng_is_initialized() has flipped to true in the meanwhile and invoke a reseed from get_random_bytes() if so. Make this move: - rename the former drbg_async_seed() work handler, i.e. the one in charge of reseeding a DRBG instance from get_random_bytes(), to "drbg_seed_from_random()", - change its signature as appropriate, i.e. make it take a struct drbg_state rather than a work_struct and change its return type from "void" to "int" in order to allow for passing error information from e.g. its __drbg_seed() invocation onwards to callers, - make drbg_generate() invoke this drbg_seed_from_random() once it encounters a DRBG instance with ->seeded == DRBG_SEED_STATE_PARTIAL by the time rng_is_initialized() has flipped to true and - prune everything related to the former, random_ready_callback based mechanism. As drbg_seed_from_random() is now getting invoked from drbg_generate() with the ->drbg_mutex being held, it must not attempt to recursively grab it once again. Remove the corresponding mutex operations from what is now drbg_seed_from_random(). Furthermore, as drbg_seed_from_random() can now report errors directly to its caller, there's no need for it to temporarily switch the DRBG's ->seeded state to DRBG_SEED_STATE_UNSEEDED so that a failure of the subsequently invoked __drbg_seed() will get signaled to drbg_generate(). Don't do it then. Signed-off-by: Nicolai Stange Signed-off-by: Herbert Xu --- include/crypto/drbg.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index 3ebdb1effe74..a6c3b8e7deb6 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -137,12 +137,10 @@ struct drbg_state { bool pr; /* Prediction resistance enabled? */ bool fips_primed; /* Continuous test primed? */ unsigned char *prev; /* FIPS 140-2 continuous test value */ - struct work_struct seed_work; /* asynchronous seeding support */ struct crypto_rng *jent; const struct drbg_state_ops *d_ops; const struct drbg_core *core; struct drbg_string test_data; - struct random_ready_callback random_ready; }; static inline __u8 drbg_statelen(struct drbg_state *drbg) -- cgit v1.2.3 From 8ea5ee00beb925d2aa0fed0eb3faf04715a3f2bd Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Mon, 15 Nov 2021 15:18:09 +0100 Subject: crypto: drbg - reseed 'nopr' drbgs periodically from get_random_bytes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In contrast to the fully prediction resistant 'pr' DRBGs, the 'nopr' variants get seeded once at boot and reseeded only rarely thereafter, namely only after 2^20 requests have been served each. AFAICT, this reseeding based on the number of requests served is primarily motivated by information theoretic considerations, c.f. NIST SP800-90Ar1, sec. 8.6.8 ("Reseeding"). However, given the relatively large seed lifetime of 2^20 requests, the 'nopr' DRBGs can hardly be considered to provide any prediction resistance whatsoever, i.e. to protect against threats like side channel leaks of the internal DRBG state (think e.g. leaked VM snapshots). This is expected and completely in line with the 'nopr' naming, but as e.g. the "drbg_nopr_hmac_sha512" implementation is potentially being used for providing the "stdrng" and thus, the crypto_default_rng serving the in-kernel crypto, it would certainly be desirable to achieve at least the same level of prediction resistance as get_random_bytes() does. Note that the chacha20 rngs underlying get_random_bytes() get reseeded every CRNG_RESEED_INTERVAL == 5min: the secondary, per-NUMA node rngs from the primary one and the primary rng in turn from the entropy pool, provided sufficient entropy is available. The 'nopr' DRBGs do draw randomness from get_random_bytes() for their initial seed already, so making them to reseed themselves periodically from get_random_bytes() in order to let them benefit from the latter's prediction resistance is not such a big change conceptually. In principle, it would have been also possible to make the 'nopr' DRBGs to periodically invoke a full reseeding operation, i.e. to also consider the jitterentropy source (if enabled) in addition to get_random_bytes() for the seed value. However, get_random_bytes() is relatively lightweight as compared to the jitterentropy generation process and thus, even though the 'nopr' reseeding is supposed to get invoked infrequently, it's IMO still worthwhile to avoid occasional latency spikes for drbg_generate() and stick to get_random_bytes() only. As an additional remark, note that drawing randomness from the non-SP800-90B-conforming get_random_bytes() only won't adversely affect SP800-90A conformance either: the very same is being done during boot via drbg_seed_from_random() already once rng_is_initialized() flips to true and it follows that if the DRBG implementation does conform to SP800-90A now, it will continue to do so. Make the 'nopr' DRBGs to reseed themselves periodically from get_random_bytes() every CRNG_RESEED_INTERVAL == 5min. More specifically, introduce a new member ->last_seed_time to struct drbg_state for recording in units of jiffies when the last seeding operation had taken place. Make __drbg_seed() maintain it and let drbg_generate() invoke a reseed from get_random_bytes() via drbg_seed_from_random() if more than 5min have passed by since the last seeding operation. Be careful to not to reseed if in testing mode though, or otherwise the drbg related tests in crypto/testmgr.c would fail to reproduce the expected output. In order to keep the formatting clean in drbg_generate() wrap the logic for deciding whether or not a reseed is due in a new helper, drbg_nopr_reseed_interval_elapsed(). Signed-off-by: Nicolai Stange Reviewed-by: Stephan Müller Signed-off-by: Herbert Xu --- include/crypto/drbg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index a6c3b8e7deb6..af5ad51d3eef 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -134,6 +134,7 @@ struct drbg_state { struct scatterlist sg_in, sg_out; /* CTR mode SGLs */ enum drbg_seed_state seeded; /* DRBG fully seeded? */ + unsigned long last_seed_time; bool pr; /* Prediction resistance enabled? */ bool fips_primed; /* Continuous test primed? */ unsigned char *prev; /* FIPS 140-2 continuous test value */ -- cgit v1.2.3 From b808f32023dd8127b0fa27f60fa69a959fd70388 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20M=C3=BCller?= Date: Fri, 19 Nov 2021 07:55:33 +0100 Subject: crypto: kdf - Add key derivation self-test support code As a preparation to add the key derivation implementations, the self-test data structure definition and the common test code is made available. The test framework follows the testing applied by the NIST CAVP test approach. The structure of the test code follows the implementations found in crypto/testmgr.c|h. In case the KDF implementations will be made available via a kernel crypto API templates, the test code is intended to be merged into testmgr.c|h. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- include/crypto/internal/kdf_selftest.h | 71 ++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 include/crypto/internal/kdf_selftest.h (limited to 'include') diff --git a/include/crypto/internal/kdf_selftest.h b/include/crypto/internal/kdf_selftest.h new file mode 100644 index 000000000000..4d03d2af57b7 --- /dev/null +++ b/include/crypto/internal/kdf_selftest.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (C) 2021, Stephan Mueller + */ + +#ifndef _CRYPTO_KDF_SELFTEST_H +#define _CRYPTO_KDF_SELFTEST_H + +#include +#include + +struct kdf_testvec { + unsigned char *key; + size_t keylen; + unsigned char *ikm; + size_t ikmlen; + struct kvec info; + unsigned char *expected; + size_t expectedlen; +}; + +static inline int +kdf_test(const struct kdf_testvec *test, const char *name, + int (*crypto_kdf_setkey)(struct crypto_shash *kmd, + const u8 *key, size_t keylen, + const u8 *ikm, size_t ikmlen), + int (*crypto_kdf_generate)(struct crypto_shash *kmd, + const struct kvec *info, + unsigned int info_nvec, + u8 *dst, unsigned int dlen)) +{ + struct crypto_shash *kmd; + int ret; + u8 *buf = kzalloc(test->expectedlen, GFP_KERNEL); + + if (!buf) + return -ENOMEM; + + kmd = crypto_alloc_shash(name, 0, 0); + if (IS_ERR(kmd)) { + pr_err("alg: kdf: could not allocate hash handle for %s\n", + name); + kfree(buf); + return -ENOMEM; + } + + ret = crypto_kdf_setkey(kmd, test->key, test->keylen, + test->ikm, test->ikmlen); + if (ret) { + pr_err("alg: kdf: could not set key derivation key\n"); + goto err; + } + + ret = crypto_kdf_generate(kmd, &test->info, 1, buf, test->expectedlen); + if (ret) { + pr_err("alg: kdf: could not obtain key data\n"); + goto err; + } + + ret = memcmp(test->expected, buf, test->expectedlen); + if (ret) + ret = -EINVAL; + +err: + crypto_free_shash(kmd); + kfree(buf); + return ret; +} + +#endif /* _CRYPTO_KDF_SELFTEST_H */ -- cgit v1.2.3 From 026a733e66592e743a0905c7fd6b5d3bf89b2d7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20M=C3=BCller?= Date: Fri, 19 Nov 2021 07:55:58 +0100 Subject: crypto: kdf - add SP800-108 counter key derivation function SP800-108 defines three KDFs - this patch provides the counter KDF implementation. The KDF is implemented as a service function where the caller has to maintain the hash / HMAC state. Apart from this hash/HMAC state, no additional state is required to be maintained by either the caller or the KDF implementation. The key for the KDF is set with the crypto_kdf108_setkey function which is intended to be invoked before the caller requests a key derivation operation via crypto_kdf108_ctr_generate. SP800-108 allows the use of either a HMAC or a hash as crypto primitive for the KDF. When a HMAC primtive is intended to be used, crypto_kdf108_setkey must be used to set the HMAC key. Otherwise, for a hash crypto primitve crypto_kdf108_ctr_generate can be used immediately after allocating the hash handle. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- include/crypto/kdf_sp800108.h | 61 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 include/crypto/kdf_sp800108.h (limited to 'include') diff --git a/include/crypto/kdf_sp800108.h b/include/crypto/kdf_sp800108.h new file mode 100644 index 000000000000..b7b20a778fb7 --- /dev/null +++ b/include/crypto/kdf_sp800108.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (C) 2021, Stephan Mueller + */ + +#ifndef _CRYPTO_KDF108_H +#define _CRYPTO_KDF108_H + +#include +#include + +/** + * Counter KDF generate operation according to SP800-108 section 5.1 + * as well as SP800-56A section 5.8.1 (Single-step KDF). + * + * @kmd Keyed message digest whose key was set with crypto_kdf108_setkey or + * unkeyed message digest + * @info optional context and application specific information - this may be + * NULL + * @info_vec number of optional context/application specific information entries + * @dst destination buffer that the caller already allocated + * @dlen length of the destination buffer - the KDF derives that amount of + * bytes. + * + * To comply with SP800-108, the caller must provide Label || 0x00 || Context + * in the info parameter. + * + * @return 0 on success, < 0 on error + */ +int crypto_kdf108_ctr_generate(struct crypto_shash *kmd, + const struct kvec *info, unsigned int info_nvec, + u8 *dst, unsigned int dlen); + +/** + * Counter KDF setkey operation + * + * @kmd Keyed message digest allocated by the caller. The key should not have + * been set. + * @key Seed key to be used to initialize the keyed message digest context. + * @keylen This length of the key buffer. + * @ikm The SP800-108 KDF does not support IKM - this parameter must be NULL + * @ikmlen This parameter must be 0. + * + * According to SP800-108 section 7.2, the seed key must be at least as large as + * the message digest size of the used keyed message digest. This limitation + * is enforced by the implementation. + * + * SP800-108 allows the use of either a HMAC or a hash primitive. When + * the caller intends to use a hash primitive, the call to + * crypto_kdf108_setkey is not required and the key derivation operation can + * immediately performed using crypto_kdf108_ctr_generate after allocating + * a handle. + * + * @return 0 on success, < 0 on error + */ +int crypto_kdf108_setkey(struct crypto_shash *kmd, + const u8 *key, size_t keylen, + const u8 *ikm, size_t ikmlen); + +#endif /* _CRYPTO_KDF108_H */ -- cgit v1.2.3 From d787a3e38f01bfc4566df4e85d432a29d192e637 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 12 Nov 2021 12:22:23 +0100 Subject: mac80211: add support for .ndo_fill_forward_path This allows drivers to provide a destination device + info for flow offload Only supported in combination with 802.3 encap offload Signed-off-by: Felix Fietkau Tested-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/20211112112223.1209-1-nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 775dbb982654..10e6fe215f0f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3952,6 +3952,8 @@ struct ieee80211_prep_tx_info { * radar channel. * The caller is expected to set chandef pointer to NULL in order to * disable offchannel CAC/radar detection. + * @net_fill_forward_path: Called from .ndo_fill_forward_path in order to + * resolve a path for hardware flow offloading */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4282,6 +4284,11 @@ struct ieee80211_ops { struct ieee80211_sta *sta, u8 flowid); int (*set_radar_offchan)(struct ieee80211_hw *hw, struct cfg80211_chan_def *chandef); + int (*net_fill_forward_path)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct net_device_path_ctx *ctx, + struct net_device_path *path); }; /** -- cgit v1.2.3 From c47240cb46a10c40686ce4e25c64aaed676f71c9 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 16 Nov 2021 12:41:52 +0100 Subject: cfg80211: schedule offchan_cac_abort_wk in cfg80211_radar_event If necessary schedule offchan_cac_abort_wk work in cfg80211_radar_event routine adding offchan parameter to cfg80211_radar_event signature. Rename cfg80211_radar_event in __cfg80211_radar_event and introduce the two following inline helpers: - cfg80211_radar_event - cfg80211_offchan_radar_event Doing so the drv will not need to run cfg80211_offchan_cac_abort() after radar detection on the offchannel chain. Tested-by: Owen Peng Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/3ff583e021e3343a3ced54a7b09b5e184d1880dc.1637062727.git.lorenzo@kernel.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 362da9f6bf39..a887086cb103 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7605,15 +7605,33 @@ void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer, void cfg80211_cqm_beacon_loss_notify(struct net_device *dev, gfp_t gfp); /** - * cfg80211_radar_event - radar detection event + * __cfg80211_radar_event - radar detection event * @wiphy: the wiphy * @chandef: chandef for the current channel + * @offchan: the radar has been detected on the offchannel chain * @gfp: context flags * * This function is called when a radar is detected on the current chanenl. */ -void cfg80211_radar_event(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, gfp_t gfp); +void __cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + bool offchan, gfp_t gfp); + +static inline void +cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) +{ + __cfg80211_radar_event(wiphy, chandef, false, gfp); +} + +static inline void +cfg80211_offchan_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) +{ + __cfg80211_radar_event(wiphy, chandef, true, gfp); +} /** * cfg80211_sta_opmode_change_notify - STA's ht/vht operation mode change event -- cgit v1.2.3 From fb5f6a0e8063b7a84d6d44ef353846ccd7708d2e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 18 Nov 2021 12:38:39 -0800 Subject: mac80211: Use memset_after() to clear tx status In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memset(), avoid intentionally writing across neighboring fields. Use memset_after() so memset() doesn't get confused about writing beyond the destination member that is intended to be the starting point of zeroing through the end of the struct. Additionally fix the common helper, ieee80211_tx_info_clear_status(), which was not clearing ack_signal, but the open-coded versions did. Johannes Berg points out this bug was introduced by commit e3e1a0bcb3f1 ("mac80211: reduce IEEE80211_TX_MAX_RATES") but was harmless. Also drops the associated unneeded BUILD_BUG_ON()s, and adds a note to carl9170 about usage. Signed-off-by: Kees Cook Tested-by: Christian Lamparter [both CARL9170+P54USB on real HW] Link: https://lore.kernel.org/r/20211118203839.1289276-1-keescook@chromium.org Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 10e6fe215f0f..e349f57d19ae 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1205,12 +1205,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) /* clear the rate counts */ for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) info->status.rates[i].count = 0; - - BUILD_BUG_ON( - offsetof(struct ieee80211_tx_info, status.ack_signal) != 20); - memset(&info->status.ampdu_ack_len, 0, - sizeof(struct ieee80211_tx_info) - - offsetof(struct ieee80211_tx_info, status.ampdu_ack_len)); + memset_after(&info->status, 0, rates); } -- cgit v1.2.3 From a0f84dfb3f6d9f78f862cbe885036d3e4449fc6f Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Thu, 25 Nov 2021 12:15:19 +0200 Subject: ASoC: SOF: IPC: dai: Expand DAI_CONFIG IPC flags Some DAI components, such as HDaudio, need to be stopped in two steps a) stop the DAI component b) stop the DAI DMA This patch enables this two-step stop by expanding the DAI_CONFIG IPC flags and split them into 2 parts. The 4 LSB bits indicate when the DAI_CONFIG IPC is sent, ex: hw_params, hw_free or pause. The 4 MSB bits are used as the quirk flags to be used along with the command flags. The quirk flag called SOF_DAI_CONFIG_FLAGS_2_STEP_STOP shall be set along with the HW_PARAMS command flag, i.e. before the pipeline is started so that the stop/pause trigger op in the FW can take the appropriate action to either perform/skip the DMA stop. If set, the DMA stop will be executed when the DAI_CONFIG IPC is sent during hw_free. In the case of pause, DMA pause will be handled when the DAI_CONFIG IPC is sent with the PAUSE command flag. Along with this, modify the signature for the hda_ctrl_dai_widget_setup/ hda_ctrl_dai_widget_free() functions to take additional flags as an argument and modify all users to pass the appropriate quirk flags. Only the HDA DAI's need to pass the SOF_DAI_CONFIG_FLAGS_2_STEP_STOP quirk flag during hw_params to indicate that it supports two-step stop and pause. Signed-off-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Signed-off-by: Kai Vehmanen Link: https://lore.kernel.org/r/20211125101520.291581-10-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof/dai.h | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h index 5132bc60f54b..59ee50ac7705 100644 --- a/include/sound/sof/dai.h +++ b/include/sound/sof/dai.h @@ -52,12 +52,25 @@ #define SOF_DAI_FMT_INV_MASK 0x0f00 #define SOF_DAI_FMT_CLOCK_PROVIDER_MASK 0xf000 -/* DAI_CONFIG flags */ -#define SOF_DAI_CONFIG_FLAGS_MASK 0x3 -#define SOF_DAI_CONFIG_FLAGS_NONE (0 << 0) /**< DAI_CONFIG sent without stage information */ -#define SOF_DAI_CONFIG_FLAGS_HW_PARAMS (1 << 0) /**< DAI_CONFIG sent during hw_params stage */ -#define SOF_DAI_CONFIG_FLAGS_HW_FREE (2 << 0) /**< DAI_CONFIG sent during hw_free stage */ -#define SOF_DAI_CONFIG_FLAGS_RFU (3 << 0) /**< not used, reserved for future use */ +/* + * DAI_CONFIG flags. The 4 LSB bits are used for the commands, HW_PARAMS, HW_FREE and PAUSE + * representing when the IPC is sent. The 4 MSB bits are used to add quirks along with the above + * commands. + */ +#define SOF_DAI_CONFIG_FLAGS_CMD_MASK 0xF +#define SOF_DAI_CONFIG_FLAGS_NONE 0 /**< DAI_CONFIG sent without stage information */ +#define SOF_DAI_CONFIG_FLAGS_HW_PARAMS BIT(0) /**< DAI_CONFIG sent during hw_params stage */ +#define SOF_DAI_CONFIG_FLAGS_HW_FREE BIT(1) /**< DAI_CONFIG sent during hw_free stage */ +/**< DAI_CONFIG sent during pause trigger. Only available ABI 3.20 onwards */ +#define SOF_DAI_CONFIG_FLAGS_PAUSE BIT(2) +#define SOF_DAI_CONFIG_FLAGS_QUIRK_SHIFT 4 +#define SOF_DAI_CONFIG_FLAGS_QUIRK_MASK (0xF << SOF_DAI_CONFIG_FLAGS_QUIRK_SHIFT) +/* + * This should be used along with the SOF_DAI_CONFIG_FLAGS_HW_PARAMS to indicate that pipeline + * stop/pause and DAI DMA stop/pause should happen in two steps. This change is only available + * ABI 3.20 onwards. + */ +#define SOF_DAI_CONFIG_FLAGS_2_STEP_STOP BIT(0) /** \brief Types of DAI */ enum sof_ipc_dai_type { -- cgit v1.2.3 From b4c80629c5c9d48880c5ad99943374f9ab72432e Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sun, 23 May 2021 22:49:57 +0200 Subject: include/linux/byteorder/generic.h: fix index variables In cpu_to_be32_array() and be32_to_cpu_array() the length of the array is given by variable len of type size_t. An index variable of type int is used to iterate over the array. This is bound to fail for len > INT_MAX and lets GCC add instructions for sign extension. Correct the type of the index variable. Signed-off-by: Heinrich Schuchardt Link: https://lore.kernel.org/r/20210523204958.64575-1-xypron.glpk@gmx.de Signed-off-by: Greg Kroah-Hartman --- include/linux/byteorder/generic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h index 4b13e0a3e15b..c9a4c96c9943 100644 --- a/include/linux/byteorder/generic.h +++ b/include/linux/byteorder/generic.h @@ -190,7 +190,7 @@ static inline void be64_add_cpu(__be64 *var, u64 val) static inline void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len) { - int i; + size_t i; for (i = 0; i < len; i++) dst[i] = cpu_to_be32(src[i]); @@ -198,7 +198,7 @@ static inline void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len) static inline void be32_to_cpu_array(u32 *dst, const __be32 *src, size_t len) { - int i; + size_t i; for (i = 0; i < len; i++) dst[i] = be32_to_cpu(src[i]); -- cgit v1.2.3 From 18e6c0751cf9ae0631a2623e31af2bf504f72c30 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 26 Nov 2021 09:15:49 +0100 Subject: tty: finish kernel-doc of tty_struct members There are already pieces of kernel-doc documentation for struct tty_struct in tty.h. Finish the documentation for the members which were undocumented yet. It also includes tuning the already existing pieces like flow and ctrl, especially adding highlights to them. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211126081611.11001-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 79 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 64 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/tty.h b/include/linux/tty.h index 5dbd7c5afac7..da49ad9be281 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -122,33 +122,84 @@ struct tty_operations; /** * struct tty_struct - state associated with a tty while open * - * @flow.lock: lock for flow members - * @flow.stopped: tty stopped/started by tty_stop/tty_start - * @flow.tco_stopped: tty stopped/started by TCOOFF/TCOON ioctls (it has - * precedense over @flow.stopped) + * @magic: magic value set early in @alloc_tty_struct to %TTY_MAGIC, for + * debugging purposes + * @kref: reference counting by tty_kref_get() and tty_kref_put(), reaching zero + * frees the structure + * @dev: class device or %NULL (e.g. ptys, serdev) + * @driver: &struct tty_driver operating this tty + * @ops: &struct tty_operations of @driver for this tty (open, close, etc.) + * @index: index of this tty (e.g. to construct @name like tty12) + * @ldisc_sem: protects line discipline changes (@ldisc) -- lock tty not pty + * @ldisc: the current line discipline for this tty (n_tty by default) + * @atomic_write_lock: protects against concurrent writers, i.e. locks + * @write_cnt, @write_buf and similar + * @legacy_mutex: leftover from history (BKL -> BTM -> @legacy_mutex), + * protecting several operations on this tty + * @throttle_mutex: protects against concurrent tty_throttle_safe() and + * tty_unthrottle_safe() (but not tty_unthrottle()) + * @termios_rwsem: protects @termios and @termios_locked + * @winsize_mutex: protects @winsize + * @termios: termios for the current tty, copied from/to @driver.termios + * @termios_locked: locked termios (by %TIOCGLCKTRMIOS and %TIOCSLCKTRMIOS + * ioctls) + * @name: name of the tty constructed by tty_line_name() (e.g. ttyS3) + * @flags: bitwise OR of %TTY_THROTTLED, %TTY_IO_ERROR, ... + * @count: count of open processes, reaching zero cancels all the work for + * this tty and drops a @kref too (but does not free this tty) + * @winsize: size of the terminal "window" (cf. @winsize_mutex) + * @flow: flow settings grouped together, see also @flow.unused + * @flow.lock: lock for @flow members + * @flow.stopped: tty stopped/started by stop_tty()/start_tty() + * @flow.tco_stopped: tty stopped/started by %TCOOFF/%TCOON ioctls (it has + * precedence over @flow.stopped) * @flow.unused: alignment for Alpha, so that no members other than @flow.* are * modified by the same 64b word store. The @flow's __aligned is * there for the very same reason. - * @ctrl.lock: lock for ctrl members + * @ctrl: control settings grouped together, see also @ctrl.unused + * @ctrl.lock: lock for @ctrl members * @ctrl.pgrp: process group of this tty (setpgrp(2)) * @ctrl.session: session of this tty (setsid(2)). Writes are protected by both - * @ctrl.lock and legacy mutex, readers must use at least one of + * @ctrl.lock and @legacy_mutex, readers must use at least one of * them. - * @ctrl.pktstatus: packet mode status (bitwise OR of TIOCPKT_* constants) + * @ctrl.pktstatus: packet mode status (bitwise OR of %TIOCPKT_ constants) * @ctrl.packet: packet mode enabled + * @ctrl.unused: alignment for Alpha, see @flow.unused for explanation + * @hw_stopped: not controlled by the tty layer, under @driver's control for CTS + * handling + * @receive_room: bytes permitted to feed to @ldisc without any being lost + * @flow_change: controls behavior of throttling, see tty_throttle_safe() and + * tty_unthrottle_safe() + * @link: link to another pty (master -> slave and vice versa) + * @fasync: state for %O_ASYNC (for %SIGIO); managed by fasync_helper() + * @write_wait: concurrent writers are waiting in this queue until they are + * allowed to write + * @read_wait: readers wait for data in this queue + * @hangup_work: normally a work to perform a hangup (do_tty_hangup()); while + * freeing the tty, (re)used to release_one_tty() + * @disc_data: pointer to @ldisc's private data (e.g. to &struct n_tty_data) + * @driver_data: pointer to @driver's private data (e.g. &struct uart_state) + * @files_lock: protects @tty_files list + * @tty_files: list of (re)openers of this tty (i.e. linked &struct + * tty_file_private) + * @closing: when set during close, n_tty processes only START & STOP chars + * @write_buf: temporary buffer used during tty_write() to copy user data to + * @write_cnt: count of bytes written in tty_write() to @write_buf + * @SAK_work: if the tty has a pending do_SAK, it is queued here + * @port: persistent storage for this device (i.e. &struct tty_port) * * All of the state associated with a tty while the tty is open. Persistent - * storage for tty devices is referenced here as @port in struct tty_port. + * storage for tty devices is referenced here as @port and is documented in + * &struct tty_port. */ struct tty_struct { int magic; struct kref kref; - struct device *dev; /* class device or NULL (e.g. ptys, serdev) */ + struct device *dev; struct tty_driver *driver; const struct tty_operations *ops; int index; - /* Protects ldisc changes: Lock tty not pty */ struct ld_semaphore ldisc_sem; struct tty_ldisc *ldisc; @@ -157,12 +208,11 @@ struct tty_struct { struct mutex throttle_mutex; struct rw_semaphore termios_rwsem; struct mutex winsize_mutex; - /* Termios values are protected by the termios rwsem */ struct ktermios termios, termios_locked; char name[64]; unsigned long flags; int count; - struct winsize winsize; /* winsize_mutex */ + struct winsize winsize; struct { spinlock_t lock; @@ -181,7 +231,7 @@ struct tty_struct { } __aligned(sizeof(unsigned long)) ctrl; int hw_stopped; - unsigned int receive_room; /* Bytes free for queue */ + unsigned int receive_room; int flow_change; struct tty_struct *link; @@ -191,7 +241,7 @@ struct tty_struct { struct work_struct hangup_work; void *disc_data; void *driver_data; - spinlock_t files_lock; /* protects tty_files list */ + spinlock_t files_lock; struct list_head tty_files; #define N_TTY_BUF_SIZE 4096 @@ -199,7 +249,6 @@ struct tty_struct { int closing; unsigned char *write_buf; int write_cnt; - /* If the tty has a pending do_SAK, queue it here - akpm */ struct work_struct SAK_work; struct tty_port *port; } __randomize_layout; -- cgit v1.2.3 From 61c83addb77c65f498a8db0e7113dc3acf753c45 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 26 Nov 2021 09:15:50 +0100 Subject: tty: add kernel-doc for tty_port tty_port used to have only short comments along its members. Convert them into proper kernel-doc comments in front of the structure. And add some more explanation to them where needed. The whole structure purpose and handling is documented at the end too -- some pieces of preexisting text moved to this place. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211126081611.11001-3-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 104 +++++++++++++++++++++++++++++++---------------- 1 file changed, 70 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index 6e86e9e118b6..9091e1c8de4c 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -7,17 +7,6 @@ #include #include -/* - * Port level information. Each device keeps its own port level information - * so provide a common structure for those ports wanting to use common support - * routines. - * - * The tty port has a different lifetime to the tty so must be kept apart. - * In addition be careful as tty -> port mappings are valid for the life - * of the tty object but in many cases port -> tty mappings are valid only - * until a hangup so don't use the wrong path. - */ - struct attribute_group; struct tty_driver; struct tty_port; @@ -48,30 +37,77 @@ struct tty_port_client_operations { extern const struct tty_port_client_operations tty_port_default_client_ops; +/** + * struct tty_port -- port level information + * + * @buf: buffer for this port, locked internally + * @tty: back pointer to &struct tty_struct, valid only if the tty is open. Use + * tty_port_tty_get() to obtain it (and tty_kref_put() to release). + * @itty: internal back pointer to &struct tty_struct. Avoid this. It should be + * eliminated in the long term. + * @ops: tty port operations (like activate, shutdown), see &struct + * tty_port_operations + * @client_ops: tty port client operations (like receive_buf, write_wakeup). + * By default, tty_port_default_client_ops is used. + * @lock: lock protecting @tty + * @blocked_open: # of procs waiting for open in tty_port_block_til_ready() + * @count: usage count + * @open_wait: open waiters queue (waiting e.g. for a carrier) + * @delta_msr_wait: modem status change queue (waiting for MSR changes) + * @flags: user TTY flags (%ASYNC_) + * @iflags: internal flags (%TTY_PORT_) + * @console: when set, the port is a console + * @mutex: locking, for open, shutdown and other port operations + * @buf_mutex: @xmit_buf alloc lock + * @xmit_buf: optional xmit buffer used by some drivers + * @close_delay: delay in jiffies to wait when closing the port + * @closing_wait: delay in jiffies for output to be sent before closing + * @drain_delay: set to zero if no pure time based drain is needed else set to + * size of fifo + * @kref: references counter. Reaching zero calls @ops->destruct() if non-%NULL + * or frees the port otherwise. + * @client_data: pointer to private data, for @client_ops + * + * Each device keeps its own port level information. &struct tty_port was + * introduced as a common structure for such information. As every TTY device + * shall have a backing tty_port structure, every driver can use these members. + * + * The tty port has a different lifetime to the tty so must be kept apart. + * In addition be careful as tty -> port mappings are valid for the life + * of the tty object but in many cases port -> tty mappings are valid only + * until a hangup so don't use the wrong path. + * + * Tty port shall be initialized by tty_port_init() and shut down either by + * tty_port_destroy() (refcounting not used), or tty_port_put() (refcounting). + * + * There is a lot of helpers around &struct tty_port too. To name the most + * significant ones: tty_port_open(), tty_port_close() (or + * tty_port_close_start() and tty_port_close_end() separately if need be), and + * tty_port_hangup(). These call @ops->activate() and @ops->shutdown() as + * needed. + */ struct tty_port { - struct tty_bufhead buf; /* Locked internally */ - struct tty_struct *tty; /* Back pointer */ - struct tty_struct *itty; /* internal back ptr */ - const struct tty_port_operations *ops; /* Port operations */ - const struct tty_port_client_operations *client_ops; /* Port client operations */ - spinlock_t lock; /* Lock protecting tty field */ - int blocked_open; /* Waiting to open */ - int count; /* Usage count */ - wait_queue_head_t open_wait; /* Open waiters */ - wait_queue_head_t delta_msr_wait; /* Modem status change */ - unsigned long flags; /* User TTY flags ASYNC_ */ - unsigned long iflags; /* Internal flags TTY_PORT_ */ - unsigned char console:1; /* port is a console */ - struct mutex mutex; /* Locking */ - struct mutex buf_mutex; /* Buffer alloc lock */ - unsigned char *xmit_buf; /* Optional buffer */ - unsigned int close_delay; /* Close port delay */ - unsigned int closing_wait; /* Delay for output */ - int drain_delay; /* Set to zero if no pure time - based drain is needed else - set to size of fifo */ - struct kref kref; /* Ref counter */ - void *client_data; + struct tty_bufhead buf; + struct tty_struct *tty; + struct tty_struct *itty; + const struct tty_port_operations *ops; + const struct tty_port_client_operations *client_ops; + spinlock_t lock; + int blocked_open; + int count; + wait_queue_head_t open_wait; + wait_queue_head_t delta_msr_wait; + unsigned long flags; + unsigned long iflags; + unsigned char console:1; + struct mutex mutex; + struct mutex buf_mutex; + unsigned char *xmit_buf; + unsigned int close_delay; + unsigned int closing_wait; + int drain_delay; + struct kref kref; + void *client_data; }; /* tty_port::iflags bits -- use atomic bit ops */ -- cgit v1.2.3 From a6563830215226aae0e7e6802955c77a6a7b7547 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 26 Nov 2021 09:15:51 +0100 Subject: tty: add kernel-doc for tty_driver tty_driver used to have only short comments along its members. Convert them into proper kernel-doc comments in front of the structure. And add some more explanation to them where needed. The whole structure handling is documented at the end too. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211126081611.11001-4-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_driver.h | 62 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 795b94ccdeb6..3622404a678d 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -291,23 +291,61 @@ struct tty_operations { int (*proc_show)(struct seq_file *, void *); } __randomize_layout; +/** + * struct tty_driver -- driver for TTY devices + * + * @magic: set to %TTY_DRIVER_MAGIC in __tty_alloc_driver() + * @kref: reference counting. Reaching zero frees all the internals and the + * driver. + * @cdevs: allocated/registered character /dev devices + * @owner: modules owning this driver. Used drivers cannot be rmmod'ed. + * Automatically set by tty_alloc_driver(). + * @driver_name: name of the driver used in /proc/tty + * @name: used for constructing /dev node name + * @name_base: used as a number base for constructing /dev node name + * @major: major /dev device number (zero for autoassignment) + * @minor_start: the first minor /dev device number + * @num: number of devices allocated + * @type: type of tty driver (%TTY_DRIVER_TYPE_) + * @subtype: subtype of tty driver (%SYSTEM_TYPE_, %PTY_TYPE_, %SERIAL_TYPE_) + * @init_termios: termios to set to each tty initially (e.g. %tty_std_termios) + * @flags: tty driver flags (%TTY_DRIVER_) + * @proc_entry: proc fs entry, used internally + * @other: driver of the linked tty; only used for the PTY driver + * @ttys: array of active &struct tty_struct, set by tty_standard_install() + * @ports: array of &struct tty_port; can be set during initialization by + * tty_port_link_device() and similar + * @termios: storage for termios at each TTY close for the next open + * @driver_state: pointer to driver's arbitrary data + * @ops: driver hooks for TTYs. Set them using tty_set_operations(). Use &struct + * tty_port helpers in them as much as possible. + * @tty_drivers: used internally to link tty_drivers together + * + * The usual handling of &struct tty_driver is to allocate it by + * tty_alloc_driver(), set up all the necessary members, and register it by + * tty_register_driver(). At last, the driver is torn down by calling + * tty_unregister_driver() followed by tty_driver_kref_put(). + * + * The fields required to be set before calling tty_register_driver() include + * @driver_name, @name, @type, @subtype, @init_termios, and @ops. + */ struct tty_driver { - int magic; /* magic number for this structure */ - struct kref kref; /* Reference management */ + int magic; + struct kref kref; struct cdev **cdevs; struct module *owner; const char *driver_name; const char *name; - int name_base; /* offset of printed name */ - int major; /* major device number */ - int minor_start; /* start of minor device number */ - unsigned int num; /* number of devices allocated */ - short type; /* type of tty driver */ - short subtype; /* subtype of tty driver */ - struct ktermios init_termios; /* Initial termios */ - unsigned long flags; /* tty driver flags */ - struct proc_dir_entry *proc_entry; /* /proc fs entry */ - struct tty_driver *other; /* only used for the PTY driver */ + int name_base; + int major; + int minor_start; + unsigned int num; + short type; + short subtype; + struct ktermios init_termios; + unsigned long flags; + struct proc_dir_entry *proc_entry; + struct tty_driver *other; /* * Pointer to the tty data structures -- cgit v1.2.3 From 1fe183091753b1d7f11e70593700c0c0ef268db7 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 26 Nov 2021 09:15:52 +0100 Subject: tty: add kernel-doc for tty_operations tty_operations structure was already documented in a standalone comment in the header beginning. Move it right before the structure and reformat it so it complies to kernel-doc. That way, we can include it in Documentation/ later in this series. Note that we named proc_show's parameters, so that we can reference them in the text. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211126081611.11001-5-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_driver.h | 398 +++++++++++++++++++++++++++------------------ 1 file changed, 241 insertions(+), 157 deletions(-) (limited to 'include') diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 3622404a678d..5611992ab26a 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -2,248 +2,332 @@ #ifndef _LINUX_TTY_DRIVER_H #define _LINUX_TTY_DRIVER_H -/* - * This structure defines the interface between the low-level tty - * driver and the tty routines. The following routines can be - * defined; unless noted otherwise, they are optional, and can be - * filled in with a null pointer. +#include +#include +#include +#include +#include +#include +#include + +struct tty_struct; +struct tty_driver; +struct serial_icounter_struct; +struct serial_struct; + +/** + * struct tty_operations -- interface between driver and tty * - * struct tty_struct * (*lookup)(struct tty_driver *self, struct file *, int idx) + * @lookup: ``struct tty_struct *()(struct tty_driver *self, struct file *, + * int idx)`` * - * Return the tty device corresponding to idx, NULL if there is not - * one currently in use and an ERR_PTR value on error. Called under - * tty_mutex (for now!) + * Return the tty device corresponding to @idx, %NULL if there is not + * one currently in use and an %ERR_PTR value on error. Called under + * %tty_mutex (for now!) * - * Optional method. Default behaviour is to use the ttys array + * Optional method. Default behaviour is to use the @self->ttys array. * - * int (*install)(struct tty_driver *self, struct tty_struct *tty) + * @install: ``int ()(struct tty_driver *self, struct tty_struct *tty)`` * - * Install a new tty into the tty driver internal tables. Used in - * conjunction with lookup and remove methods. + * Install a new @tty into the @self's internal tables. Used in + * conjunction with @lookup and @remove methods. * - * Optional method. Default behaviour is to use the ttys array + * Optional method. Default behaviour is to use the @self->ttys array. * - * void (*remove)(struct tty_driver *self, struct tty_struct *tty) + * @remove: ``void ()(struct tty_driver *self, struct tty_struct *tty)`` * - * Remove a closed tty from the tty driver internal tables. Used in - * conjunction with lookup and remove methods. + * Remove a closed @tty from the @self's internal tables. Used in + * conjunction with @lookup and @remove methods. * - * Optional method. Default behaviour is to use the ttys array + * Optional method. Default behaviour is to use the @self->ttys array. * - * int (*open)(struct tty_struct * tty, struct file * filp); + * @open: ``int ()(struct tty_struct *tty, struct file *)`` * - * This routine is called when a particular tty device is opened. - * This routine is mandatory; if this routine is not filled in, - * the attempted open will fail with ENODEV. + * This routine is called when a particular @tty device is opened. This + * routine is mandatory; if this routine is not filled in, the attempted + * open will fail with %ENODEV. * * Required method. Called with tty lock held. * - * void (*close)(struct tty_struct * tty, struct file * filp); + * @close: ``void ()(struct tty_struct *tty, struct file *)`` * - * This routine is called when a particular tty device is closed. - * Note: called even if the corresponding open() failed. + * This routine is called when a particular @tty device is closed. + * + * Remark: called even if the corresponding @open() failed. * * Required method. Called with tty lock held. * - * void (*shutdown)(struct tty_struct * tty); + * @shutdown: ``void ()(struct tty_struct *tty)`` * - * This routine is called under the tty lock when a particular tty device - * is closed for the last time. It executes before the tty resources - * are freed so may execute while another function holds a tty kref. + * This routine is called under the tty lock when a particular @tty device + * is closed for the last time. It executes before the @tty resources + * are freed so may execute while another function holds a @tty kref. * - * void (*cleanup)(struct tty_struct * tty); + * @cleanup: ``void ()(struct tty_struct *tty)`` * - * This routine is called asynchronously when a particular tty device + * This routine is called asynchronously when a particular @tty device * is closed for the last time freeing up the resources. This is * actually the second part of shutdown for routines that might sleep. * + * @write: ``int ()(struct tty_struct *tty, const unsigned char *buf, + * int count)`` * - * int (*write)(struct tty_struct * tty, - * const unsigned char *buf, int count); - * - * This routine is called by the kernel to write a series of - * characters to the tty device. The characters may come from - * user space or kernel space. This routine will return the + * This routine is called by the kernel to write a series (@count) of + * characters (@buf) to the @tty device. The characters may come from + * user space or kernel space. This routine will return the * number of characters actually accepted for writing. * * Optional: Required for writable devices. * - * int (*put_char)(struct tty_struct *tty, unsigned char ch); + * @put_char: ``int ()(struct tty_struct *tty, unsigned char ch)`` * - * This routine is called by the kernel to write a single - * character to the tty device. If the kernel uses this routine, - * it must call the flush_chars() routine (if defined) when it is - * done stuffing characters into the driver. If there is no room - * in the queue, the character is ignored. + * This routine is called by the kernel to write a single character @ch to + * the @tty device. If the kernel uses this routine, it must call the + * @flush_chars() routine (if defined) when it is done stuffing characters + * into the driver. If there is no room in the queue, the character is + * ignored. * - * Optional: Kernel will use the write method if not provided. + * Optional: Kernel will use the @write method if not provided. Do not + * call this function directly, call tty_put_char(). * - * Note: Do not call this function directly, call tty_put_char + * @flush_chars: ``void ()(struct tty_struct *tty)`` * - * void (*flush_chars)(struct tty_struct *tty); + * This routine is called by the kernel after it has written a + * series of characters to the tty device using @put_char(). * - * This routine is called by the kernel after it has written a - * series of characters to the tty device using put_char(). + * Optional. Do not call this function directly, call + * tty_driver_flush_chars(). * - * Optional: + * @write_room: ``unsigned int ()(struct tty_struct *tty)`` * - * Note: Do not call this function directly, call tty_driver_flush_chars - * - * unsigned int (*write_room)(struct tty_struct *tty); - * - * This routine returns the numbers of characters the tty driver - * will accept for queuing to be written. This number is subject - * to change as output buffers get emptied, or if the output flow + * This routine returns the numbers of characters the @tty driver + * will accept for queuing to be written. This number is subject + * to change as output buffers get emptied, or if the output flow * control is acted. * - * Required if write method is provided else not needed. + * Required if @write method is provided else not needed. Do not call this + * function directly, call tty_write_room() * - * Note: Do not call this function directly, call tty_write_room - * - * int (*ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); + * @chars_in_buffer: ``unsigned int ()(struct tty_struct *tty)`` * - * This routine allows the tty driver to implement - * device-specific ioctls. If the ioctl number passed in cmd - * is not recognized by the driver, it should return ENOIOCTLCMD. + * This routine returns the number of characters in the device private + * output queue. Used in tty_wait_until_sent() and for poll() + * implementation. * - * Optional + * Optional: if not provided, it is assumed there is no queue on the + * device. Do not call this function directly, call tty_chars_in_buffer(). * - * long (*compat_ioctl)(struct tty_struct *tty,, - * unsigned int cmd, unsigned long arg); + * @ioctl: ``int ()(struct tty_struct *tty, unsigned int cmd, + * unsigned long arg)`` * - * implement ioctl processing for 32 bit process on 64 bit system + * This routine allows the @tty driver to implement device-specific + * ioctls. If the ioctl number passed in @cmd is not recognized by the + * driver, it should return %ENOIOCTLCMD. * - * Optional - * - * void (*set_termios)(struct tty_struct *tty, struct ktermios * old); + * Optional. * - * This routine allows the tty driver to be notified when - * device's termios settings have changed. + * @compat_ioctl: ``long ()(struct tty_struct *tty, unsigned int cmd, + * unsigned long arg)`` * - * Optional: Called under the termios lock + * Implement ioctl processing for 32 bit process on 64 bit system. * + * Optional. * - * void (*set_ldisc)(struct tty_struct *tty); + * @set_termios: ``void ()(struct tty_struct *tty, struct ktermios *old)`` * - * This routine allows the tty driver to be notified when the - * device's termios settings have changed. + * This routine allows the @tty driver to be notified when device's + * termios settings have changed. * - * Optional: Called under BKL (currently) - * - * void (*throttle)(struct tty_struct * tty); + * Optional: Called under the @tty->termios_rwsem. * - * This routine notifies the tty driver that input buffers for - * the line discipline are close to full, and it should somehow - * signal that no more characters should be sent to the tty. + * @set_ldisc: ``void ()(struct tty_struct *tty)`` * - * Optional: Always invoke via tty_throttle_safe(), called under the - * termios lock. - * - * void (*unthrottle)(struct tty_struct * tty); + * This routine allows the @tty driver to be notified when the device's + * line discipline is being changed. * - * This routine notifies the tty drivers that it should signals - * that characters can now be sent to the tty without fear of - * overrunning the input buffers of the line disciplines. - * - * Optional: Always invoke via tty_unthrottle(), called under the - * termios lock. + * Optional. Called under the @tty->ldisc_sem and @tty->termios_rwsem. * - * void (*stop)(struct tty_struct *tty); + * @throttle: ``void ()(struct tty_struct *tty)`` * - * This routine notifies the tty driver that it should stop - * outputting characters to the tty device. + * This routine notifies the @tty driver that input buffers for the line + * discipline are close to full, and it should somehow signal that no more + * characters should be sent to the @tty. * - * Called with ->flow.lock held. Serialized with start() method. + * Optional: Always invoke via tty_throttle_safe(). Called under the + * @tty->termios_rwsem. * - * Optional: + * @unthrottle: ``void ()(struct tty_struct *tty)`` * - * Note: Call stop_tty not this method. - * - * void (*start)(struct tty_struct *tty); + * This routine notifies the @tty driver that it should signal that + * characters can now be sent to the @tty without fear of overrunning the + * input buffers of the line disciplines. * - * This routine notifies the tty driver that it resume sending + * Optional. Always invoke via tty_unthrottle(). Called under the + * @tty->termios_rwsem. + * + * @stop: ``void ()(struct tty_struct *tty)`` + * + * This routine notifies the @tty driver that it should stop outputting * characters to the tty device. * - * Called with ->flow.lock held. Serialized with stop() method. + * Called with @tty->flow.lock held. Serialized with @start() method. * - * Optional: + * Optional. Always invoke via stop_tty(). * - * Note: Call start_tty not this method. - * - * void (*hangup)(struct tty_struct *tty); + * @start: ``void ()(struct tty_struct *tty)`` * - * This routine notifies the tty driver that it should hang up the - * tty device. + * This routine notifies the @tty driver that it resumed sending + * characters to the @tty device. * - * Optional: + * Called with @tty->flow.lock held. Serialized with stop() method. * - * Called with tty lock held. + * Optional. Always invoke via start_tty(). * - * int (*break_ctl)(struct tty_struct *tty, int state); + * @hangup: ``void ()(struct tty_struct *tty)`` * - * This optional routine requests the tty driver to turn on or - * off BREAK status on the RS-232 port. If state is -1, - * then the BREAK status should be turned on; if state is 0, then - * BREAK should be turned off. + * This routine notifies the @tty driver that it should hang up the @tty + * device. * - * If this routine is implemented, the high-level tty driver will - * handle the following ioctls: TCSBRK, TCSBRKP, TIOCSBRK, - * TIOCCBRK. + * Optional. Called with tty lock held. * - * If the driver sets TTY_DRIVER_HARDWARE_BREAK then the interface - * will also be called with actual times and the hardware is expected - * to do the delay work itself. 0 and -1 are still used for on/off. + * @break_ctl: ``int ()(struct tty_struct *tty, int state)`` * - * Optional: Required for TCSBRK/BRKP/etc handling. + * This optional routine requests the @tty driver to turn on or off BREAK + * status on the RS-232 port. If @state is -1, then the BREAK status + * should be turned on; if @state is 0, then BREAK should be turned off. * - * void (*wait_until_sent)(struct tty_struct *tty, int timeout); - * - * This routine waits until the device has written out all of the - * characters in its transmitter FIFO. + * If this routine is implemented, the high-level tty driver will handle + * the following ioctls: %TCSBRK, %TCSBRKP, %TIOCSBRK, %TIOCCBRK. + * + * If the driver sets %TTY_DRIVER_HARDWARE_BREAK in tty_alloc_driver(), + * then the interface will also be called with actual times and the + * hardware is expected to do the delay work itself. 0 and -1 are still + * used for on/off. + * + * Optional: Required for %TCSBRK/%BRKP/etc. handling. + * + * @flush_buffer: ``void ()(struct tty_struct *tty)`` + * + * This routine discards device private output buffer. Invoked on close, + * hangup, to implement %TCOFLUSH ioctl and similar. + * + * Optional: if not provided, it is assumed there is no queue on the + * device. Do not call this function directly, call + * tty_driver_flush_buffer(). + * + * @wait_until_sent: ``void ()(struct tty_struct *tty, int timeout)`` + * + * This routine waits until the device has written out all of the + * characters in its transmitter FIFO. Or until @timeout (in jiffies) is + * reached. + * + * Optional: If not provided, the device is assumed to have no FIFO. + * Usually correct to invoke via tty_wait_until_sent(). + * + * @send_xchar: ``void ()(struct tty_struct *tty, char ch)`` + * + * This routine is used to send a high-priority XON/XOFF character (@ch) + * to the @tty device. * - * Optional: If not provided the device is assumed to have no FIFO + * Optional: If not provided, then the @write method is called under + * the @tty->atomic_write_lock to keep it serialized with the ldisc. * - * Note: Usually correct to call tty_wait_until_sent + * @tiocmget: ``int ()(struct tty_struct *tty)`` * - * void (*send_xchar)(struct tty_struct *tty, char ch); + * This routine is used to obtain the modem status bits from the @tty + * driver. * - * This routine is used to send a high-priority XON/XOFF - * character to the device. + * Optional: If not provided, then %ENOTTY is returned from the %TIOCMGET + * ioctl. Do not call this function directly, call tty_tiocmget(). * - * Optional: If not provided then the write method is called under - * the atomic write lock to keep it serialized with the ldisc. + * @tiocmset: ``int ()(struct tty_struct *tty, + * unsigned int set, unsigned int clear)`` * - * int (*resize)(struct tty_struct *tty, struct winsize *ws) + * This routine is used to set the modem status bits to the @tty driver. + * First, @clear bits should be cleared, then @set bits set. * - * Called when a termios request is issued which changes the - * requested terminal geometry. + * Optional: If not provided, then %ENOTTY is returned from the %TIOCMSET + * ioctl. Do not call this function directly, call tty_tiocmset(). + * + * @resize: ``int ()(struct tty_struct *tty, struct winsize *ws)`` + * + * Called when a termios request is issued which changes the requested + * terminal geometry to @ws. * * Optional: the default action is to update the termios structure * without error. This is usually the correct behaviour. Drivers should - * not force errors here if they are not resizable objects (eg a serial + * not force errors here if they are not resizable objects (e.g. a serial * line). See tty_do_resize() if you need to wrap the standard method - * in your own logic - the usual case. + * in your own logic -- the usual case. + * + * @get_icount: ``int ()(struct tty_struct *tty, + * struct serial_icounter *icount)`` + * + * Called when the @tty device receives a %TIOCGICOUNT ioctl. Passed a + * kernel structure @icount to complete. + * + * Optional: called only if provided, otherwise %ENOTTY will be returned. + * + * @get_serial: ``int ()(struct tty_struct *tty, struct serial_struct *p)`` + * + * Called when the @tty device receives a %TIOCGSERIAL ioctl. Passed a + * kernel structure @p (&struct serial_struct) to complete. + * + * Optional: called only if provided, otherwise %ENOTTY will be returned. + * Do not call this function directly, call tty_tiocgserial(). + * + * @set_serial: ``int ()(struct tty_struct *tty, struct serial_struct *p)`` + * + * Called when the @tty device receives a %TIOCSSERIAL ioctl. Passed a + * kernel structure @p (&struct serial_struct) to set the values from. + * + * Optional: called only if provided, otherwise %ENOTTY will be returned. + * Do not call this function directly, call tty_tiocsserial(). + * + * @show_fdinfo: ``void ()(struct tty_struct *tty, struct seq_file *m)`` + * + * Called when the @tty device file descriptor receives a fdinfo request + * from VFS (to show in /proc//fdinfo/). @m should be filled with + * information. + * + * Optional: called only if provided, otherwise nothing is written to @m. + * Do not call this function directly, call tty_show_fdinfo(). + * + * @poll_init: ``int ()(struct tty_driver *driver, int line, char *options)`` + * + * kgdboc support (Documentation/dev-tools/kgdb.rst). This routine is + * called to initialize the HW for later use by calling @poll_get_char or + * @poll_put_char. + * + * Optional: called only if provided, otherwise skipped as a non-polling + * driver. + * + * @poll_get_char: ``int ()(struct tty_driver *driver, int line)`` + * + * kgdboc support (see @poll_init). @driver should read a character from a + * tty identified by @line and return it. + * + * Optional: called only if @poll_init provided. * - * int (*get_icount)(struct tty_struct *tty, struct serial_icounter *icount); + * @poll_put_char: ``void ()(struct tty_driver *driver, int line, char ch)`` * - * Called when the device receives a TIOCGICOUNT ioctl. Passed a kernel - * structure to complete. This method is optional and will only be called - * if provided (otherwise ENOTTY will be returned). + * kgdboc support (see @poll_init). @driver should write character @ch to + * a tty identified by @line. + * + * Optional: called only if @poll_init provided. + * + * @proc_show: ``int ()(struct seq_file *m, void *driver)`` + * + * Driver @driver (cast to &struct tty_driver) can show additional info in + * /proc/tty/driver/. It is enough to fill in the information + * into @m. + * + * Optional: called only if provided, otherwise no /proc entry created. + * + * This structure defines the interface between the low-level tty driver and + * the tty routines. These routines can be defined. Unless noted otherwise, + * they are optional, and can be filled in with a %NULL pointer. */ - -#include -#include -#include -#include -#include -#include -#include - -struct tty_struct; -struct tty_driver; -struct serial_icounter_struct; -struct serial_struct; - struct tty_operations { struct tty_struct * (*lookup)(struct tty_driver *driver, struct file *filp, int idx); @@ -288,7 +372,7 @@ struct tty_operations { int (*poll_get_char)(struct tty_driver *driver, int line); void (*poll_put_char)(struct tty_driver *driver, int line, char ch); #endif - int (*proc_show)(struct seq_file *, void *); + int (*proc_show)(struct seq_file *m, void *driver); } __randomize_layout; /** -- cgit v1.2.3 From 630bf86d15778fd3e5df17cb6e00839d0f44a707 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 26 Nov 2021 09:15:53 +0100 Subject: tty: add kernel-doc for tty_port_operations tty_port_operations used to have only comments along its members. Convert them into proper kernel-doc comments in front of the structure. And add some more explanation to them where needed. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211126081611.11001-6-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index 9091e1c8de4c..d3ea9ed0b98e 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -12,21 +12,28 @@ struct tty_driver; struct tty_port; struct tty_struct; +/** + * struct tty_port_operations -- operations on tty_port + * @carrier_raised: return 1 if the carrier is raised on @port + * @dtr_rts: raise the DTR line if @raise is nonzero, otherwise lower DTR + * @shutdown: called when the last close completes or a hangup finishes IFF the + * port was initialized. Do not use to free resources. Turn off the device + * only. Called under the port mutex to serialize against @activate and + * @shutdown. + * @activate: called under the port mutex from tty_port_open(), serialized using + * the port mutex. Supposed to turn on the device. + * + * FIXME: long term getting the tty argument *out* of this would be good + * for consoles. + * + * @destruct: called on the final put of a port. Free resources, possibly incl. + * the port itself. + */ struct tty_port_operations { - /* Return 1 if the carrier is raised */ int (*carrier_raised)(struct tty_port *port); - /* Control the DTR line */ void (*dtr_rts)(struct tty_port *port, int raise); - /* Called when the last close completes or a hangup finishes - IFF the port was initialized. Do not use to free resources. Called - under the port mutex to serialize against activate/shutdowns */ void (*shutdown)(struct tty_port *port); - /* Called under the port mutex from tty_port_open, serialized using - the port mutex */ - /* FIXME: long term getting the tty argument *out* of this would be - good for consoles */ int (*activate)(struct tty_port *port, struct tty_struct *tty); - /* Called on the final put of a port */ void (*destruct)(struct tty_port *port); }; -- cgit v1.2.3 From 0c6119f9f7dc03a53bd35ca5a77926eef3c33d10 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 26 Nov 2021 09:15:54 +0100 Subject: tty: add kernel-doc for tty_ldisc_ops tty_ldisc_ops structure was already partially documented in a standalone comment in the header beginning. Move it right before the structure and reformat it so it complies to kernel-doc. That way, we can include it in Documentation/ later in this series. And add the documentation for the members where missing too. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211126081611.11001-7-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_ldisc.h | 259 +++++++++++++++++++++++----------------------- 1 file changed, 130 insertions(+), 129 deletions(-) (limited to 'include') diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index 25f07017bbad..e0da0ba02de9 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -4,126 +4,6 @@ struct tty_struct; -/* - * This structure defines the interface between the tty line discipline - * implementation and the tty routines. The following routines can be - * defined; unless noted otherwise, they are optional, and can be - * filled in with a null pointer. - * - * int (*open)(struct tty_struct *); - * - * This function is called when the line discipline is associated - * with the tty. The line discipline can use this as an - * opportunity to initialize any state needed by the ldisc routines. - * - * void (*close)(struct tty_struct *); - * - * This function is called when the line discipline is being - * shutdown, either because the tty is being closed or because - * the tty is being changed to use a new line discipline - * - * void (*flush_buffer)(struct tty_struct *tty); - * - * This function instructs the line discipline to clear its - * buffers of any input characters it may have queued to be - * delivered to the user mode process. - * - * ssize_t (*read)(struct tty_struct * tty, struct file * file, - * unsigned char * buf, size_t nr); - * - * This function is called when the user requests to read from - * the tty. The line discipline will return whatever characters - * it has buffered up for the user. If this function is not - * defined, the user will receive an EIO error. - * - * ssize_t (*write)(struct tty_struct * tty, struct file * file, - * const unsigned char * buf, size_t nr); - * - * This function is called when the user requests to write to the - * tty. The line discipline will deliver the characters to the - * low-level tty device for transmission, optionally performing - * some processing on the characters first. If this function is - * not defined, the user will receive an EIO error. - * - * int (*ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); - * - * This function is called when the user requests an ioctl which - * is not handled by the tty layer or the low-level tty driver. - * It is intended for ioctls which affect line discpline - * operation. Note that the search order for ioctls is (1) tty - * layer, (2) tty low-level driver, (3) line discpline. So a - * low-level driver can "grab" an ioctl request before the line - * discpline has a chance to see it. - * - * int (*compat_ioctl)(struct tty_struct *tty, unsigned int cmd, - * unsigned long arg); - * - * Process ioctl calls from 32-bit process on 64-bit system - * - * NOTE: only ioctls that are neither "pointer to compatible - * structure" nor tty-generic. Something private that takes - * an integer or a pointer to wordsize-sensitive structure - * belongs here, but most of ldiscs will happily leave - * it NULL. - * - * void (*set_termios)(struct tty_struct *tty, struct ktermios * old); - * - * This function notifies the line discpline that a change has - * been made to the termios structure. - * - * int (*poll)(struct tty_struct * tty, struct file * file, - * poll_table *wait); - * - * This function is called when a user attempts to select/poll on a - * tty device. It is solely the responsibility of the line - * discipline to handle poll requests. - * - * void (*receive_buf)(struct tty_struct *, const unsigned char *cp, - * char *fp, int count); - * - * This function is called by the low-level tty driver to send - * characters received by the hardware to the line discpline for - * processing. is a pointer to the buffer of input - * character received by the device. is a pointer to a - * pointer of flag bytes which indicate whether a character was - * received with a parity error, etc. may be NULL to indicate - * all data received is TTY_NORMAL. - * - * void (*write_wakeup)(struct tty_struct *); - * - * This function is called by the low-level tty driver to signal - * that line discpline should try to send more characters to the - * low-level driver for transmission. If the line discpline does - * not have any more data to send, it can just return. If the line - * discipline does have some data to send, please arise a tasklet - * or workqueue to do the real data transfer. Do not send data in - * this hook, it may leads to a deadlock. - * - * int (*hangup)(struct tty_struct *) - * - * Called on a hangup. Tells the discipline that it should - * cease I/O to the tty driver. Can sleep. The driver should - * seek to perform this action quickly but should wait until - * any pending driver I/O is completed. - * - * void (*dcd_change)(struct tty_struct *tty, unsigned int status) - * - * Tells the discipline that the DCD pin has changed its status. - * Used exclusively by the N_PPS (Pulse-Per-Second) line discipline. - * - * int (*receive_buf2)(struct tty_struct *, const unsigned char *cp, - * char *fp, int count); - * - * This function is called by the low-level tty driver to send - * characters received by the hardware to the line discpline for - * processing. is a pointer to the buffer of input - * character received by the device. is a pointer to a - * pointer of flag bytes which indicate whether a character was - * received with a parity error, etc. may be NULL to indicate - * all data received is TTY_NORMAL. - * If assigned, prefer this function for automatic flow control. - */ - #include #include #include @@ -175,7 +55,128 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, ldsem_down_write(sem, timeout) #endif - +/** + * struct tty_ldisc_ops - ldisc operations + * + * @name: name of this ldisc rendered in /proc/tty/ldiscs + * @num: ``N_*`` number (%N_TTY, %N_HDLC, ...) reserved to this ldisc + * + * @open: ``int ()(struct tty_struct *tty)`` + * + * This function is called when the line discipline is associated with the + * @tty. The line discipline can use this as an opportunity to initialize + * any state needed by the ldisc routines. + * + * @close: ``void ()(struct tty_struct *tty)`` + * + * This function is called when the line discipline is being shutdown, + * either because the @tty is being closed or because the @tty is being + * changed to use a new line discipline + * + * @flush_buffer: ``void ()(struct tty_struct *tty)`` + * + * This function instructs the line discipline to clear its buffers of any + * input characters it may have queued to be delivered to the user mode + * process. + * + * @read: ``ssize_t ()(struct tty_struct *tty, struct file *file, + * unsigned char *buf, size_t nr)`` + * + * This function is called when the user requests to read from the @tty. + * The line discipline will return whatever characters it has buffered up + * for the user. If this function is not defined, the user will receive + * an %EIO error. + * + * @write: ``ssize_t ()(struct tty_struct *tty, struct file *file, + * const unsigned char *buf, size_t nr)`` + * + * This function is called when the user requests to write to the @tty. + * The line discipline will deliver the characters to the low-level tty + * device for transmission, optionally performing some processing on the + * characters first. If this function is not defined, the user will + * receive an %EIO error. + * + * @ioctl: ``int ()(struct tty_struct *tty, unsigned int cmd, + * unsigned long arg)`` + * + * This function is called when the user requests an ioctl which is not + * handled by the tty layer or the low-level tty driver. It is intended + * for ioctls which affect line discpline operation. Note that the search + * order for ioctls is (1) tty layer, (2) tty low-level driver, (3) line + * discpline. So a low-level driver can "grab" an ioctl request before + * the line discpline has a chance to see it. + * + * @compat_ioctl: ``int ()(struct tty_struct *tty, unsigned int cmd, + * unsigned long arg)`` + * + * Process ioctl calls from 32-bit process on 64-bit system. + * + * Note that only ioctls that are neither "pointer to compatible + * structure" nor tty-generic. Something private that takes an integer or + * a pointer to wordsize-sensitive structure belongs here, but most of + * ldiscs will happily leave it %NULL. + * + * @set_termios: ``void ()(struct tty_struct *tty, struct ktermios *old)`` + * + * This function notifies the line discpline that a change has been made + * to the termios structure. + * + * @poll: ``int ()(struct tty_struct *tty, struct file *file, + * struct poll_table_struct *wait)`` + * + * This function is called when a user attempts to select/poll on a @tty + * device. It is solely the responsibility of the line discipline to + * handle poll requests. + * + * @hangup: ``void ()(struct tty_struct *tty)`` + * + * Called on a hangup. Tells the discipline that it should cease I/O to + * the tty driver. Can sleep. The driver should seek to perform this + * action quickly but should wait until any pending driver I/O is + * completed. + * + * @receive_buf: ``void ()(struct tty_struct *tty, const unsigned char *cp, + * const char *fp, int count)`` + * + * This function is called by the low-level tty driver to send characters + * received by the hardware to the line discpline for processing. @cp is + * a pointer to the buffer of input character received by the device. @fp + * is a pointer to an array of flag bytes which indicate whether a + * character was received with a parity error, etc. @fp may be %NULL to + * indicate all data received is %TTY_NORMAL. + * + * @write_wakeup: ``void ()(struct tty_struct *tty)`` + * + * This function is called by the low-level tty driver to signal that line + * discpline should try to send more characters to the low-level driver + * for transmission. If the line discpline does not have any more data to + * send, it can just return. If the line discipline does have some data to + * send, please arise a tasklet or workqueue to do the real data transfer. + * Do not send data in this hook, it may lead to a deadlock. + * + * @dcd_change: ``void ()(struct tty_struct *tty, unsigned int status)`` + * + * Tells the discipline that the DCD pin has changed its status. Used + * exclusively by the %N_PPS (Pulse-Per-Second) line discipline. + * + * @receive_buf2: ``int ()(struct tty_struct *tty, const unsigned char *cp, + * const char *fp, int count)`` + * + * This function is called by the low-level tty driver to send characters + * received by the hardware to the line discpline for processing. @cp is a + * pointer to the buffer of input character received by the device. @fp + * is a pointer to an array of flag bytes which indicate whether a + * character was received with a parity error, etc. @fp may be %NULL to + * indicate all data received is %TTY_NORMAL. If assigned, prefer this + * function for automatic flow control. + * + * @owner: module containting this ldisc (for reference counting) + * + * This structure defines the interface between the tty line discipline + * implementation and the tty routines. The above routines can be defined. + * Unless noted otherwise, they are optional, and can be filled in with a %NULL + * pointer. + */ struct tty_ldisc_ops { char *name; int num; @@ -183,8 +184,8 @@ struct tty_ldisc_ops { /* * The following routines are called from above. */ - int (*open)(struct tty_struct *); - void (*close)(struct tty_struct *); + int (*open)(struct tty_struct *tty); + void (*close)(struct tty_struct *tty); void (*flush_buffer)(struct tty_struct *tty); ssize_t (*read)(struct tty_struct *tty, struct file *file, unsigned char *buf, size_t nr, @@ -196,18 +197,18 @@ struct tty_ldisc_ops { int (*compat_ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); void (*set_termios)(struct tty_struct *tty, struct ktermios *old); - __poll_t (*poll)(struct tty_struct *, struct file *, - struct poll_table_struct *); + __poll_t (*poll)(struct tty_struct *tty, struct file *file, + struct poll_table_struct *wait); void (*hangup)(struct tty_struct *tty); /* * The following routines are called from below. */ - void (*receive_buf)(struct tty_struct *, const unsigned char *cp, + void (*receive_buf)(struct tty_struct *tty, const unsigned char *cp, const char *fp, int count); - void (*write_wakeup)(struct tty_struct *); - void (*dcd_change)(struct tty_struct *, unsigned int); - int (*receive_buf2)(struct tty_struct *, const unsigned char *cp, + void (*write_wakeup)(struct tty_struct *tty); + void (*dcd_change)(struct tty_struct *tty, unsigned int status); + int (*receive_buf2)(struct tty_struct *tty, const unsigned char *cp, const char *fp, int count); struct module *owner; -- cgit v1.2.3 From 29d5ef685948369602ccd5c04d2a215449c4b943 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 26 Nov 2021 09:15:55 +0100 Subject: tty: combine tty_operations triple docs into kernel-doc In Documentation/driver-api/serial/tty.rst, there are triplicated texts about some struct tty_operations' hooks. Combine them into existing kernel-doc comments of struct tty_operations and drop them from the Documentation/. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211126081611.11001-8-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_driver.h | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 5611992ab26a..41274d551e28 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -47,15 +47,17 @@ struct serial_struct; * routine is mandatory; if this routine is not filled in, the attempted * open will fail with %ENODEV. * - * Required method. Called with tty lock held. + * Required method. Called with tty lock held. May sleep. * * @close: ``void ()(struct tty_struct *tty, struct file *)`` * - * This routine is called when a particular @tty device is closed. + * This routine is called when a particular @tty device is closed. At the + * point of return from this call the driver must make no further ldisc + * calls of any kind. * * Remark: called even if the corresponding @open() failed. * - * Required method. Called with tty lock held. + * Required method. Called with tty lock held. May sleep. * * @shutdown: ``void ()(struct tty_struct *tty)`` * @@ -77,7 +79,10 @@ struct serial_struct; * user space or kernel space. This routine will return the * number of characters actually accepted for writing. * - * Optional: Required for writable devices. + * May occur in parallel in special cases. Because this includes panic + * paths drivers generally shouldn't try and do clever locking here. + * + * Optional: Required for writable devices. May not sleep. * * @put_char: ``int ()(struct tty_struct *tty, unsigned char ch)`` * @@ -105,6 +110,9 @@ struct serial_struct; * to change as output buffers get emptied, or if the output flow * control is acted. * + * The ldisc is responsible for being intelligent about multi-threading of + * write_room/write calls + * * Required if @write method is provided else not needed. Do not call this * function directly, call tty_write_room() * @@ -136,14 +144,21 @@ struct serial_struct; * @set_termios: ``void ()(struct tty_struct *tty, struct ktermios *old)`` * * This routine allows the @tty driver to be notified when device's - * termios settings have changed. + * termios settings have changed. New settings are in @tty->termios. + * Previous settings are passed in the @old argument. * - * Optional: Called under the @tty->termios_rwsem. + * The API is defined such that the driver should return the actual modes + * selected. This means that the driver is responsible for modifying any + * bits in @tty->termios it cannot fulfill to indicate the actual modes + * being used. + * + * Optional. Called under the @tty->termios_rwsem. May sleep. * * @set_ldisc: ``void ()(struct tty_struct *tty)`` * * This routine allows the @tty driver to be notified when the device's - * line discipline is being changed. + * line discipline is being changed. At the point this is done the + * discipline is not yet usable. * * Optional. Called under the @tty->ldisc_sem and @tty->termios_rwsem. * @@ -153,6 +168,9 @@ struct serial_struct; * discipline are close to full, and it should somehow signal that no more * characters should be sent to the @tty. * + * Serialization including with @unthrottle() is the job of the ldisc + * layer. + * * Optional: Always invoke via tty_throttle_safe(). Called under the * @tty->termios_rwsem. * @@ -204,7 +222,7 @@ struct serial_struct; * hardware is expected to do the delay work itself. 0 and -1 are still * used for on/off. * - * Optional: Required for %TCSBRK/%BRKP/etc. handling. + * Optional: Required for %TCSBRK/%BRKP/etc. handling. May sleep. * * @flush_buffer: ``void ()(struct tty_struct *tty)`` * @@ -222,7 +240,7 @@ struct serial_struct; * reached. * * Optional: If not provided, the device is assumed to have no FIFO. - * Usually correct to invoke via tty_wait_until_sent(). + * Usually correct to invoke via tty_wait_until_sent(). May sleep. * * @send_xchar: ``void ()(struct tty_struct *tty, char ch)`` * -- cgit v1.2.3 From 40f4268cddb93d17a11579920d940c2dca8b9445 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 26 Nov 2021 09:15:56 +0100 Subject: tty: combine tty_ldisc_ops docs into kernel-doc In Documentation/driver-api/serial/tty.rst, there are duplicated texts about some struct tty_ldisc_ops' hooks. Combine them into existing kernel-doc comments of struct tty_ldisc_ops and drop them from the Documentation/. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211126081611.11001-9-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_ldisc.h | 67 ++++++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index e0da0ba02de9..e85002b56752 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -61,33 +61,45 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * @name: name of this ldisc rendered in /proc/tty/ldiscs * @num: ``N_*`` number (%N_TTY, %N_HDLC, ...) reserved to this ldisc * - * @open: ``int ()(struct tty_struct *tty)`` + * @open: [TTY] ``int ()(struct tty_struct *tty)`` * * This function is called when the line discipline is associated with the - * @tty. The line discipline can use this as an opportunity to initialize - * any state needed by the ldisc routines. + * @tty. No other call into the line discipline for this tty will occur + * until it completes successfully. It should initialize any state needed + * by the ldisc, and set @tty->receive_room to the maximum amount of data + * the line discipline is willing to accept from the driver with a single + * call to @receive_buf(). Returning an error will prevent the ldisc from + * being attached. * - * @close: ``void ()(struct tty_struct *tty)`` + * Can sleep. + * + * @close: [TTY] ``void ()(struct tty_struct *tty)`` * * This function is called when the line discipline is being shutdown, * either because the @tty is being closed or because the @tty is being - * changed to use a new line discipline + * changed to use a new line discipline. At the point of execution no + * further users will enter the ldisc code for this tty. + * + * Can sleep. * - * @flush_buffer: ``void ()(struct tty_struct *tty)`` + * @flush_buffer: [TTY] ``void ()(struct tty_struct *tty)`` * * This function instructs the line discipline to clear its buffers of any * input characters it may have queued to be delivered to the user mode - * process. + * process. It may be called at any point between open and close. * - * @read: ``ssize_t ()(struct tty_struct *tty, struct file *file, + * @read: [TTY] ``ssize_t ()(struct tty_struct *tty, struct file *file, * unsigned char *buf, size_t nr)`` * * This function is called when the user requests to read from the @tty. * The line discipline will return whatever characters it has buffered up * for the user. If this function is not defined, the user will receive - * an %EIO error. + * an %EIO error. Multiple read calls may occur in parallel and the ldisc + * must deal with serialization issues. + * + * Can sleep. * - * @write: ``ssize_t ()(struct tty_struct *tty, struct file *file, + * @write: [TTY] ``ssize_t ()(struct tty_struct *tty, struct file *file, * const unsigned char *buf, size_t nr)`` * * This function is called when the user requests to write to the @tty. @@ -96,7 +108,9 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * characters first. If this function is not defined, the user will * receive an %EIO error. * - * @ioctl: ``int ()(struct tty_struct *tty, unsigned int cmd, + * Can sleep. + * + * @ioctl: [TTY] ``int ()(struct tty_struct *tty, unsigned int cmd, * unsigned long arg)`` * * This function is called when the user requests an ioctl which is not @@ -106,7 +120,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * discpline. So a low-level driver can "grab" an ioctl request before * the line discpline has a chance to see it. * - * @compat_ioctl: ``int ()(struct tty_struct *tty, unsigned int cmd, + * @compat_ioctl: [TTY] ``int ()(struct tty_struct *tty, unsigned int cmd, * unsigned long arg)`` * * Process ioctl calls from 32-bit process on 64-bit system. @@ -116,27 +130,29 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * a pointer to wordsize-sensitive structure belongs here, but most of * ldiscs will happily leave it %NULL. * - * @set_termios: ``void ()(struct tty_struct *tty, struct ktermios *old)`` + * @set_termios: [TTY] ``void ()(struct tty_struct *tty, struct ktermios *old)`` * * This function notifies the line discpline that a change has been made * to the termios structure. * - * @poll: ``int ()(struct tty_struct *tty, struct file *file, + * @poll: [TTY] ``int ()(struct tty_struct *tty, struct file *file, * struct poll_table_struct *wait)`` * * This function is called when a user attempts to select/poll on a @tty * device. It is solely the responsibility of the line discipline to * handle poll requests. * - * @hangup: ``void ()(struct tty_struct *tty)`` + * @hangup: [TTY] ``void ()(struct tty_struct *tty)`` * * Called on a hangup. Tells the discipline that it should cease I/O to - * the tty driver. Can sleep. The driver should seek to perform this - * action quickly but should wait until any pending driver I/O is - * completed. + * the tty driver. The driver should seek to perform this action quickly + * but should wait until any pending driver I/O is completed. No further + * calls into the ldisc code will occur. + * + * Can sleep. * - * @receive_buf: ``void ()(struct tty_struct *tty, const unsigned char *cp, - * const char *fp, int count)`` + * @receive_buf: [DRV] ``void ()(struct tty_struct *tty, + * const unsigned char *cp, const char *fp, int count)`` * * This function is called by the low-level tty driver to send characters * received by the hardware to the line discpline for processing. @cp is @@ -145,7 +161,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * character was received with a parity error, etc. @fp may be %NULL to * indicate all data received is %TTY_NORMAL. * - * @write_wakeup: ``void ()(struct tty_struct *tty)`` + * @write_wakeup: [DRV] ``void ()(struct tty_struct *tty)`` * * This function is called by the low-level tty driver to signal that line * discpline should try to send more characters to the low-level driver @@ -154,13 +170,13 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * send, please arise a tasklet or workqueue to do the real data transfer. * Do not send data in this hook, it may lead to a deadlock. * - * @dcd_change: ``void ()(struct tty_struct *tty, unsigned int status)`` + * @dcd_change: [DRV] ``void ()(struct tty_struct *tty, unsigned int status)`` * * Tells the discipline that the DCD pin has changed its status. Used * exclusively by the %N_PPS (Pulse-Per-Second) line discipline. * - * @receive_buf2: ``int ()(struct tty_struct *tty, const unsigned char *cp, - * const char *fp, int count)`` + * @receive_buf2: [DRV] ``int ()(struct tty_struct *tty, + * const unsigned char *cp, const char *fp, int count)`` * * This function is called by the low-level tty driver to send characters * received by the hardware to the line discpline for processing. @cp is a @@ -176,6 +192,9 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * implementation and the tty routines. The above routines can be defined. * Unless noted otherwise, they are optional, and can be filled in with a %NULL * pointer. + * + * Hooks marked [TTY] are invoked from the TTY core, the [DRV] ones from the + * tty_driver side. */ struct tty_ldisc_ops { char *name; -- cgit v1.2.3 From 4072254f96f954ec0d34899f15d987803b6d76a2 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 26 Nov 2021 09:15:57 +0100 Subject: tty: reformat tty_struct::flags into kernel-doc Move the partial tty_struct::flags documentation from tty_ldisc to the tty.h header and combine it with the one-liners present there. Convert all those to kernel-doc. This way, we can simply reference the documentation in Documentation while the text is still along the definitions. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211126081611.11001-10-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 74 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/tty.h b/include/linux/tty.h index da49ad9be281..7b0a5d478ef6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -263,26 +263,72 @@ struct tty_file_private { /* tty magic number */ #define TTY_MAGIC 0x5401 -/* - * These bits are used in the flags field of the tty structure. +/** + * DOC: TTY Struct Flags + * + * These bits are used in the :c:member:`tty_struct.flags` field. * * So that interrupts won't be able to mess up the queues, * copy_to_cooked must be atomic with respect to itself, as must * tty->write. Thus, you must use the inline functions set_bit() and * clear_bit() to make things atomic. + * + * TTY_THROTTLED + * Driver input is throttled. The ldisc should call + * :c:member:`tty_driver.unthrottle()` in order to resume reception when + * it is ready to process more data (at threshold min). + * + * TTY_IO_ERROR + * If set, causes all subsequent userspace read/write calls on the tty to + * fail, returning -%EIO. (May be no ldisc too.) + * + * TTY_OTHER_CLOSED + * Device is a pty and the other side has closed. + * + * TTY_EXCLUSIVE + * Exclusive open mode (a single opener). + * + * TTY_DO_WRITE_WAKEUP + * If set, causes the driver to call the + * :c:member:`tty_ldisc_ops.write_wakeup()` method in order to resume + * transmission when it can accept more data to transmit. + * + * TTY_LDISC_OPEN + * Indicates that a line discipline is open. For debugging purposes only. + * + * TTY_PTY_LOCK + * A flag private to pty code to implement %TIOCSPTLCK/%TIOCGPTLCK logic. + * + * TTY_NO_WRITE_SPLIT + * Prevent driver from splitting up writes into smaller chunks (preserve + * write boundaries to driver). + * + * TTY_HUPPED + * The TTY was hung up. This is set post :c:member:`tty_driver.hangup()`. + * + * TTY_HUPPING + * The TTY is in the process of hanging up to abort potential readers. + * + * TTY_LDISC_CHANGING + * Line discipline for this TTY is being changed. I/O should not block + * when this is set. Use tty_io_nonblock() to check. + * + * TTY_LDISC_HALTED + * Line discipline for this TTY was stopped. No work should be queued to + * this ldisc. */ -#define TTY_THROTTLED 0 /* Call unthrottle() at threshold min */ -#define TTY_IO_ERROR 1 /* Cause an I/O error (may be no ldisc too) */ -#define TTY_OTHER_CLOSED 2 /* Other side (if any) has closed */ -#define TTY_EXCLUSIVE 3 /* Exclusive open mode */ -#define TTY_DO_WRITE_WAKEUP 5 /* Call write_wakeup after queuing new */ -#define TTY_LDISC_OPEN 11 /* Line discipline is open */ -#define TTY_PTY_LOCK 16 /* pty private */ -#define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ -#define TTY_HUPPED 18 /* Post driver->hangup() */ -#define TTY_HUPPING 19 /* Hangup in progress */ -#define TTY_LDISC_CHANGING 20 /* Change pending - non-block IO */ -#define TTY_LDISC_HALTED 22 /* Line discipline is halted */ +#define TTY_THROTTLED 0 +#define TTY_IO_ERROR 1 +#define TTY_OTHER_CLOSED 2 +#define TTY_EXCLUSIVE 3 +#define TTY_DO_WRITE_WAKEUP 5 +#define TTY_LDISC_OPEN 11 +#define TTY_PTY_LOCK 16 +#define TTY_NO_WRITE_SPLIT 17 +#define TTY_HUPPED 18 +#define TTY_HUPPING 19 +#define TTY_LDISC_CHANGING 20 +#define TTY_LDISC_HALTED 22 static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file) { -- cgit v1.2.3 From 34d809f8b4ff68f63e8d7f71d93d150382c6bb8b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 26 Nov 2021 09:15:58 +0100 Subject: tty: reformat TTY_DRIVER_ flags into kernel-doc We want to reference TTY_DRIVER_* flags in Documentation/ later in this series. But the current documentation in the TTY_DRIVER_*'s header does not allow that. Reformat it to kernel-doc using "DOC" directive and line-feeds, so that we can include it as it is. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20211126081611.11001-11-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_driver.h | 90 ++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 41274d551e28..4841d8069c07 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -492,49 +492,53 @@ static inline void tty_set_operations(struct tty_driver *driver, /* tty driver magic number */ #define TTY_DRIVER_MAGIC 0x5402 -/* - * tty driver flags - * - * TTY_DRIVER_RESET_TERMIOS --- requests the tty layer to reset the - * termios setting when the last process has closed the device. - * Used for PTY's, in particular. - * - * TTY_DRIVER_REAL_RAW --- if set, indicates that the driver will - * guarantee never to set any special character handling - * flags if ((IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR || - * !INPCK)). That is, if there is no reason for the driver to - * send notifications of parity and break characters up to the - * line driver, it won't do so. This allows the line driver to - * optimize for this case if this flag is set. (Note that there - * is also a promise, if the above case is true, not to signal - * overruns, either.) - * - * TTY_DRIVER_DYNAMIC_DEV --- if set, the individual tty devices need - * to be registered with a call to tty_register_device() when the - * device is found in the system and unregistered with a call to - * tty_unregister_device() so the devices will be show up - * properly in sysfs. If not set, driver->num entries will be - * created by the tty core in sysfs when tty_register_driver() is - * called. This is to be used by drivers that have tty devices - * that can appear and disappear while the main tty driver is - * registered with the tty core. - * - * TTY_DRIVER_DEVPTS_MEM -- don't use the standard arrays, instead - * use dynamic memory keyed through the devpts filesystem. This - * is only applicable to the pty driver. - * - * TTY_DRIVER_HARDWARE_BREAK -- hardware handles break signals. Pass - * the requested timeout to the caller instead of using a simple - * on/off interface. - * - * TTY_DRIVER_DYNAMIC_ALLOC -- do not allocate structures which are - * needed per line for this driver as it would waste memory. - * The driver will take care. - * - * TTY_DRIVER_UNNUMBERED_NODE -- do not create numbered /dev nodes. In - * other words create /dev/ttyprintk and not /dev/ttyprintk0. - * Applicable only when a driver for a single tty device is - * being allocated. +/** + * DOC: TTY Driver Flags + * + * TTY_DRIVER_RESET_TERMIOS + * Requests the tty layer to reset the termios setting when the last + * process has closed the device. Used for PTYs, in particular. + * + * TTY_DRIVER_REAL_RAW + * Indicates that the driver will guarantee not to set any special + * character handling flags if this is set for the tty: + * + * ``(IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR || !INPCK)`` + * + * That is, if there is no reason for the driver to + * send notifications of parity and break characters up to the line + * driver, it won't do so. This allows the line driver to optimize for + * this case if this flag is set. (Note that there is also a promise, if + * the above case is true, not to signal overruns, either.) + * + * TTY_DRIVER_DYNAMIC_DEV + * The individual tty devices need to be registered with a call to + * tty_register_device() when the device is found in the system and + * unregistered with a call to tty_unregister_device() so the devices will + * be show up properly in sysfs. If not set, all &tty_driver.num entries + * will be created by the tty core in sysfs when tty_register_driver() is + * called. This is to be used by drivers that have tty devices that can + * appear and disappear while the main tty driver is registered with the + * tty core. + * + * TTY_DRIVER_DEVPTS_MEM + * Don't use the standard arrays (&tty_driver.ttys and + * &tty_driver.termios), instead use dynamic memory keyed through the + * devpts filesystem. This is only applicable to the PTY driver. + * + * TTY_DRIVER_HARDWARE_BREAK + * Hardware handles break signals. Pass the requested timeout to the + * &tty_operations.break_ctl instead of using a simple on/off interface. + * + * TTY_DRIVER_DYNAMIC_ALLOC + * Do not allocate structures which are needed per line for this driver + * (&tty_driver.ports) as it would waste memory. The driver will take + * care. This is only applicable to the PTY driver. + * + * TTY_DRIVER_UNNUMBERED_NODE + * Do not create numbered ``/dev`` nodes. For example, create + * ``/dev/ttyprintk`` and not ``/dev/ttyprintk0``. Applicable only when a + * driver for a single tty device is being allocated. */ #define TTY_DRIVER_INSTALLED 0x0001 #define TTY_DRIVER_RESET_TERMIOS 0x0002 -- cgit v1.2.3 From df0e68c1e9945e2ee86d266ce45597bbd8299b06 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 17 Nov 2021 12:05:59 +0000 Subject: comedi: Move the main COMEDI headers Move the main COMEDI driver headers out of "drivers/comedi/" into new directory "include/linux/comedi/". These are "comedidev.h", "comedilib.h", "comedi_pci.h", "comedi_pcmcia.h", and "comedi_usb.h". Additionally, move the user-space API header "comedi.h" into "include/uapi/linux/" and add "WITH Linux-syscall-note" to its SPDX-License-Identifier. Update the "COMEDI DRIVERS" section of the MAINTAINERS file to account for these changes. Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20211117120604.117740-2-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- include/linux/comedi/comedi_pci.h | 56 ++ include/linux/comedi/comedi_pcmcia.h | 48 ++ include/linux/comedi/comedi_usb.h | 41 + include/linux/comedi/comedidev.h | 1053 +++++++++++++++++++++++ include/linux/comedi/comedilib.h | 26 + include/uapi/linux/comedi.h | 1528 ++++++++++++++++++++++++++++++++++ 6 files changed, 2752 insertions(+) create mode 100644 include/linux/comedi/comedi_pci.h create mode 100644 include/linux/comedi/comedi_pcmcia.h create mode 100644 include/linux/comedi/comedi_usb.h create mode 100644 include/linux/comedi/comedidev.h create mode 100644 include/linux/comedi/comedilib.h create mode 100644 include/uapi/linux/comedi.h (limited to 'include') diff --git a/include/linux/comedi/comedi_pci.h b/include/linux/comedi/comedi_pci.h new file mode 100644 index 000000000000..2fb50663e3ed --- /dev/null +++ b/include/linux/comedi/comedi_pci.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * comedi_pci.h + * header file for Comedi PCI drivers + * + * COMEDI - Linux Control and Measurement Device Interface + * Copyright (C) 1997-2000 David A. Schleef + */ + +#ifndef _COMEDI_PCI_H +#define _COMEDI_PCI_H + +#include +#include + +/* + * PCI Vendor IDs not in + */ +#define PCI_VENDOR_ID_KOLTER 0x1001 +#define PCI_VENDOR_ID_ICP 0x104c +#define PCI_VENDOR_ID_DT 0x1116 +#define PCI_VENDOR_ID_IOTECH 0x1616 +#define PCI_VENDOR_ID_CONTEC 0x1221 +#define PCI_VENDOR_ID_RTD 0x1435 +#define PCI_VENDOR_ID_HUMUSOFT 0x186c + +struct pci_dev *comedi_to_pci_dev(struct comedi_device *dev); + +int comedi_pci_enable(struct comedi_device *dev); +void comedi_pci_disable(struct comedi_device *dev); +void comedi_pci_detach(struct comedi_device *dev); + +int comedi_pci_auto_config(struct pci_dev *pcidev, struct comedi_driver *driver, + unsigned long context); +void comedi_pci_auto_unconfig(struct pci_dev *pcidev); + +int comedi_pci_driver_register(struct comedi_driver *comedi_driver, + struct pci_driver *pci_driver); +void comedi_pci_driver_unregister(struct comedi_driver *comedi_driver, + struct pci_driver *pci_driver); + +/** + * module_comedi_pci_driver() - Helper macro for registering a comedi PCI driver + * @__comedi_driver: comedi_driver struct + * @__pci_driver: pci_driver struct + * + * Helper macro for comedi PCI drivers which do not do anything special + * in module init/exit. This eliminates a lot of boilerplate. Each + * module may only use this macro once, and calling it replaces + * module_init() and module_exit() + */ +#define module_comedi_pci_driver(__comedi_driver, __pci_driver) \ + module_driver(__comedi_driver, comedi_pci_driver_register, \ + comedi_pci_driver_unregister, &(__pci_driver)) + +#endif /* _COMEDI_PCI_H */ diff --git a/include/linux/comedi/comedi_pcmcia.h b/include/linux/comedi/comedi_pcmcia.h new file mode 100644 index 000000000000..a33dfb65b869 --- /dev/null +++ b/include/linux/comedi/comedi_pcmcia.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * comedi_pcmcia.h + * header file for Comedi PCMCIA drivers + * + * COMEDI - Linux Control and Measurement Device Interface + * Copyright (C) 1997-2000 David A. Schleef + */ + +#ifndef _COMEDI_PCMCIA_H +#define _COMEDI_PCMCIA_H + +#include +#include +#include + +struct pcmcia_device *comedi_to_pcmcia_dev(struct comedi_device *dev); + +int comedi_pcmcia_enable(struct comedi_device *dev, + int (*conf_check)(struct pcmcia_device *p_dev, + void *priv_data)); +void comedi_pcmcia_disable(struct comedi_device *dev); + +int comedi_pcmcia_auto_config(struct pcmcia_device *link, + struct comedi_driver *driver); +void comedi_pcmcia_auto_unconfig(struct pcmcia_device *link); + +int comedi_pcmcia_driver_register(struct comedi_driver *comedi_driver, + struct pcmcia_driver *pcmcia_driver); +void comedi_pcmcia_driver_unregister(struct comedi_driver *comedi_driver, + struct pcmcia_driver *pcmcia_driver); + +/** + * module_comedi_pcmcia_driver() - Helper macro for registering a comedi + * PCMCIA driver + * @__comedi_driver: comedi_driver struct + * @__pcmcia_driver: pcmcia_driver struct + * + * Helper macro for comedi PCMCIA drivers which do not do anything special + * in module init/exit. This eliminates a lot of boilerplate. Each + * module may only use this macro once, and calling it replaces + * module_init() and module_exit() + */ +#define module_comedi_pcmcia_driver(__comedi_driver, __pcmcia_driver) \ + module_driver(__comedi_driver, comedi_pcmcia_driver_register, \ + comedi_pcmcia_driver_unregister, &(__pcmcia_driver)) + +#endif /* _COMEDI_PCMCIA_H */ diff --git a/include/linux/comedi/comedi_usb.h b/include/linux/comedi/comedi_usb.h new file mode 100644 index 000000000000..5d17dd425bd2 --- /dev/null +++ b/include/linux/comedi/comedi_usb.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* comedi_usb.h + * header file for USB Comedi drivers + * + * COMEDI - Linux Control and Measurement Device Interface + * Copyright (C) 1997-2000 David A. Schleef + */ + +#ifndef _COMEDI_USB_H +#define _COMEDI_USB_H + +#include +#include + +struct usb_interface *comedi_to_usb_interface(struct comedi_device *dev); +struct usb_device *comedi_to_usb_dev(struct comedi_device *dev); + +int comedi_usb_auto_config(struct usb_interface *intf, + struct comedi_driver *driver, unsigned long context); +void comedi_usb_auto_unconfig(struct usb_interface *intf); + +int comedi_usb_driver_register(struct comedi_driver *comedi_driver, + struct usb_driver *usb_driver); +void comedi_usb_driver_unregister(struct comedi_driver *comedi_driver, + struct usb_driver *usb_driver); + +/** + * module_comedi_usb_driver() - Helper macro for registering a comedi USB driver + * @__comedi_driver: comedi_driver struct + * @__usb_driver: usb_driver struct + * + * Helper macro for comedi USB drivers which do not do anything special + * in module init/exit. This eliminates a lot of boilerplate. Each + * module may only use this macro once, and calling it replaces + * module_init() and module_exit() + */ +#define module_comedi_usb_driver(__comedi_driver, __usb_driver) \ + module_driver(__comedi_driver, comedi_usb_driver_register, \ + comedi_usb_driver_unregister, &(__usb_driver)) + +#endif /* _COMEDI_USB_H */ diff --git a/include/linux/comedi/comedidev.h b/include/linux/comedi/comedidev.h new file mode 100644 index 000000000000..0a1150900ef3 --- /dev/null +++ b/include/linux/comedi/comedidev.h @@ -0,0 +1,1053 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * comedidev.h + * header file for kernel-only structures, variables, and constants + * + * COMEDI - Linux Control and Measurement Device Interface + * Copyright (C) 1997-2000 David A. Schleef + */ + +#ifndef _COMEDIDEV_H +#define _COMEDIDEV_H + +#include +#include +#include +#include +#include +#include + +#define COMEDI_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + (c)) +#define COMEDI_VERSION_CODE COMEDI_VERSION(COMEDI_MAJORVERSION, \ + COMEDI_MINORVERSION, COMEDI_MICROVERSION) +#define COMEDI_RELEASE VERSION + +#define COMEDI_NUM_BOARD_MINORS 0x30 + +/** + * struct comedi_subdevice - Working data for a COMEDI subdevice + * @device: COMEDI device to which this subdevice belongs. (Initialized by + * comedi_alloc_subdevices().) + * @index: Index of this subdevice within device's array of subdevices. + * (Initialized by comedi_alloc_subdevices().) + * @type: Type of subdevice from &enum comedi_subdevice_type. (Initialized by + * the low-level driver.) + * @n_chan: Number of channels the subdevice supports. (Initialized by the + * low-level driver.) + * @subdev_flags: Various "SDF" flags indicating aspects of the subdevice to + * the COMEDI core and user application. (Initialized by the low-level + * driver.) + * @len_chanlist: Maximum length of a channel list if the subdevice supports + * asynchronous acquisition commands. (Optionally initialized by the + * low-level driver, or changed from 0 to 1 during post-configuration.) + * @private: Private data pointer which is either set by the low-level driver + * itself, or by a call to comedi_alloc_spriv() which allocates storage. + * In the latter case, the storage is automatically freed after the + * low-level driver's "detach" handler is called for the device. + * (Initialized by the low-level driver.) + * @async: Pointer to &struct comedi_async id the subdevice supports + * asynchronous acquisition commands. (Allocated and initialized during + * post-configuration if needed.) + * @lock: Pointer to a file object that performed a %COMEDI_LOCK ioctl on the + * subdevice. (Initially NULL.) + * @busy: Pointer to a file object that is performing an asynchronous + * acquisition command on the subdevice. (Initially NULL.) + * @runflags: Internal flags for use by COMEDI core, mostly indicating whether + * an asynchronous acquisition command is running. + * @spin_lock: Generic spin-lock for use by the COMEDI core and the low-level + * driver. (Initialized by comedi_alloc_subdevices().) + * @io_bits: Bit-mask indicating the channel directions for a DIO subdevice + * with no more than 32 channels. A '1' at a bit position indicates the + * corresponding channel is configured as an output. (Initialized by the + * low-level driver for a DIO subdevice. Forced to all-outputs during + * post-configuration for a digital output subdevice.) + * @maxdata: If non-zero, this is the maximum raw data value of each channel. + * If zero, the maximum data value is channel-specific. (Initialized by + * the low-level driver.) + * @maxdata_list: If the maximum data value is channel-specific, this points + * to an array of maximum data values indexed by channel index. + * (Initialized by the low-level driver.) + * @range_table: If non-NULL, this points to a COMEDI range table for the + * subdevice. If NULL, the range table is channel-specific. (Initialized + * by the low-level driver, will be set to an "invalid" range table during + * post-configuration if @range_table and @range_table_list are both + * NULL.) + * @range_table_list: If the COMEDI range table is channel-specific, this + * points to an array of pointers to COMEDI range tables indexed by + * channel number. (Initialized by the low-level driver.) + * @chanlist: Not used. + * @insn_read: Optional pointer to a handler for the %INSN_READ instruction. + * (Initialized by the low-level driver, or set to a default handler + * during post-configuration.) + * @insn_write: Optional pointer to a handler for the %INSN_WRITE instruction. + * (Initialized by the low-level driver, or set to a default handler + * during post-configuration.) + * @insn_bits: Optional pointer to a handler for the %INSN_BITS instruction + * for a digital input, digital output or digital input/output subdevice. + * (Initialized by the low-level driver, or set to a default handler + * during post-configuration.) + * @insn_config: Optional pointer to a handler for the %INSN_CONFIG + * instruction. (Initialized by the low-level driver, or set to a default + * handler during post-configuration.) + * @do_cmd: If the subdevice supports asynchronous acquisition commands, this + * points to a handler to set it up in hardware. (Initialized by the + * low-level driver.) + * @do_cmdtest: If the subdevice supports asynchronous acquisition commands, + * this points to a handler used to check and possibly tweak a prospective + * acquisition command without setting it up in hardware. (Initialized by + * the low-level driver.) + * @poll: If the subdevice supports asynchronous acquisition commands, this + * is an optional pointer to a handler for the %COMEDI_POLL ioctl which + * instructs the low-level driver to synchronize buffers. (Initialized by + * the low-level driver if needed.) + * @cancel: If the subdevice supports asynchronous acquisition commands, this + * points to a handler used to terminate a running command. (Initialized + * by the low-level driver.) + * @buf_change: If the subdevice supports asynchronous acquisition commands, + * this is an optional pointer to a handler that is called when the data + * buffer for handling asynchronous commands is allocated or reallocated. + * (Initialized by the low-level driver if needed.) + * @munge: If the subdevice supports asynchronous acquisition commands and + * uses DMA to transfer data from the hardware to the acquisition buffer, + * this points to a function used to "munge" the data values from the + * hardware into the format expected by COMEDI. (Initialized by the + * low-level driver if needed.) + * @async_dma_dir: If the subdevice supports asynchronous acquisition commands + * and uses DMA to transfer data from the hardware to the acquisition + * buffer, this sets the DMA direction for the buffer. (initialized to + * %DMA_NONE by comedi_alloc_subdevices() and changed by the low-level + * driver if necessary.) + * @state: Handy bit-mask indicating the output states for a DIO or digital + * output subdevice with no more than 32 channels. (Initialized by the + * low-level driver.) + * @class_dev: If the subdevice supports asynchronous acquisition commands, + * this points to a sysfs comediX_subdY device where X is the minor device + * number of the COMEDI device and Y is the subdevice number. The minor + * device number for the sysfs device is allocated dynamically in the + * range 48 to 255. This is used to allow the COMEDI device to be opened + * with a different default read or write subdevice. (Allocated during + * post-configuration if needed.) + * @minor: If @class_dev is set, this is its dynamically allocated minor + * device number. (Set during post-configuration if necessary.) + * @readback: Optional pointer to memory allocated by + * comedi_alloc_subdev_readback() used to hold the values written to + * analog output channels so they can be read back. The storage is + * automatically freed after the low-level driver's "detach" handler is + * called for the device. (Initialized by the low-level driver.) + * + * This is the main control structure for a COMEDI subdevice. If the subdevice + * supports asynchronous acquisition commands, additional information is stored + * in the &struct comedi_async pointed to by @async. + * + * Most of the subdevice is initialized by the low-level driver's "attach" or + * "auto_attach" handlers but parts of it are initialized by + * comedi_alloc_subdevices(), and other parts are initialized during + * post-configuration on return from that handler. + * + * A low-level driver that sets @insn_bits for a digital input, digital output, + * or DIO subdevice may leave @insn_read and @insn_write uninitialized, in + * which case they will be set to a default handler during post-configuration + * that uses @insn_bits to emulate the %INSN_READ and %INSN_WRITE instructions. + */ +struct comedi_subdevice { + struct comedi_device *device; + int index; + int type; + int n_chan; + int subdev_flags; + int len_chanlist; /* maximum length of channel/gain list */ + + void *private; + + struct comedi_async *async; + + void *lock; + void *busy; + unsigned int runflags; + spinlock_t spin_lock; /* generic spin-lock for COMEDI and drivers */ + + unsigned int io_bits; + + unsigned int maxdata; /* if maxdata==0, use list */ + const unsigned int *maxdata_list; /* list is channel specific */ + + const struct comedi_lrange *range_table; + const struct comedi_lrange *const *range_table_list; + + unsigned int *chanlist; /* driver-owned chanlist (not used) */ + + int (*insn_read)(struct comedi_device *dev, struct comedi_subdevice *s, + struct comedi_insn *insn, unsigned int *data); + int (*insn_write)(struct comedi_device *dev, struct comedi_subdevice *s, + struct comedi_insn *insn, unsigned int *data); + int (*insn_bits)(struct comedi_device *dev, struct comedi_subdevice *s, + struct comedi_insn *insn, unsigned int *data); + int (*insn_config)(struct comedi_device *dev, + struct comedi_subdevice *s, + struct comedi_insn *insn, + unsigned int *data); + + int (*do_cmd)(struct comedi_device *dev, struct comedi_subdevice *s); + int (*do_cmdtest)(struct comedi_device *dev, + struct comedi_subdevice *s, + struct comedi_cmd *cmd); + int (*poll)(struct comedi_device *dev, struct comedi_subdevice *s); + int (*cancel)(struct comedi_device *dev, struct comedi_subdevice *s); + + /* called when the buffer changes */ + int (*buf_change)(struct comedi_device *dev, + struct comedi_subdevice *s); + + void (*munge)(struct comedi_device *dev, struct comedi_subdevice *s, + void *data, unsigned int num_bytes, + unsigned int start_chan_index); + enum dma_data_direction async_dma_dir; + + unsigned int state; + + struct device *class_dev; + int minor; + + unsigned int *readback; +}; + +/** + * struct comedi_buf_page - Describe a page of a COMEDI buffer + * @virt_addr: Kernel address of page. + * @dma_addr: DMA address of page if in DMA coherent memory. + */ +struct comedi_buf_page { + void *virt_addr; + dma_addr_t dma_addr; +}; + +/** + * struct comedi_buf_map - Describe pages in a COMEDI buffer + * @dma_hw_dev: Low-level hardware &struct device pointer copied from the + * COMEDI device's hw_dev member. + * @page_list: Pointer to array of &struct comedi_buf_page, one for each + * page in the buffer. + * @n_pages: Number of pages in the buffer. + * @dma_dir: DMA direction used to allocate pages of DMA coherent memory, + * or %DMA_NONE if pages allocated from regular memory. + * @refcount: &struct kref reference counter used to free the buffer. + * + * A COMEDI data buffer is allocated as individual pages, either in + * conventional memory or DMA coherent memory, depending on the attached, + * low-level hardware device. (The buffer pages also get mapped into the + * kernel's contiguous virtual address space pointed to by the 'prealloc_buf' + * member of &struct comedi_async.) + * + * The buffer is normally freed when the COMEDI device is detached from the + * low-level driver (which may happen due to device removal), but if it happens + * to be mmapped at the time, the pages cannot be freed until the buffer has + * been munmapped. That is what the reference counter is for. (The virtual + * address space pointed by 'prealloc_buf' is freed when the COMEDI device is + * detached.) + */ +struct comedi_buf_map { + struct device *dma_hw_dev; + struct comedi_buf_page *page_list; + unsigned int n_pages; + enum dma_data_direction dma_dir; + struct kref refcount; +}; + +/** + * struct comedi_async - Control data for asynchronous COMEDI commands + * @prealloc_buf: Kernel virtual address of allocated acquisition buffer. + * @prealloc_bufsz: Buffer size (in bytes). + * @buf_map: Map of buffer pages. + * @max_bufsize: Maximum allowed buffer size (in bytes). + * @buf_write_count: "Write completed" count (in bytes, modulo 2**32). + * @buf_write_alloc_count: "Allocated for writing" count (in bytes, + * modulo 2**32). + * @buf_read_count: "Read completed" count (in bytes, modulo 2**32). + * @buf_read_alloc_count: "Allocated for reading" count (in bytes, + * modulo 2**32). + * @buf_write_ptr: Buffer position for writer. + * @buf_read_ptr: Buffer position for reader. + * @cur_chan: Current position in chanlist for scan (for those drivers that + * use it). + * @scans_done: The number of scans completed. + * @scan_progress: Amount received or sent for current scan (in bytes). + * @munge_chan: Current position in chanlist for "munging". + * @munge_count: "Munge" count (in bytes, modulo 2**32). + * @munge_ptr: Buffer position for "munging". + * @events: Bit-vector of events that have occurred. + * @cmd: Details of comedi command in progress. + * @wait_head: Task wait queue for file reader or writer. + * @cb_mask: Bit-vector of events that should wake waiting tasks. + * @inttrig: Software trigger function for command, or NULL. + * + * Note about the ..._count and ..._ptr members: + * + * Think of the _Count values being integers of unlimited size, indexing + * into a buffer of infinite length (though only an advancing portion + * of the buffer of fixed length prealloc_bufsz is accessible at any + * time). Then: + * + * Buf_Read_Count <= Buf_Read_Alloc_Count <= Munge_Count <= + * Buf_Write_Count <= Buf_Write_Alloc_Count <= + * (Buf_Read_Count + prealloc_bufsz) + * + * (Those aren't the actual members, apart from prealloc_bufsz.) When the + * buffer is reset, those _Count values start at 0 and only increase in value, + * maintaining the above inequalities until the next time the buffer is + * reset. The buffer is divided into the following regions by the inequalities: + * + * [0, Buf_Read_Count): + * old region no longer accessible + * + * [Buf_Read_Count, Buf_Read_Alloc_Count): + * filled and munged region allocated for reading but not yet read + * + * [Buf_Read_Alloc_Count, Munge_Count): + * filled and munged region not yet allocated for reading + * + * [Munge_Count, Buf_Write_Count): + * filled region not yet munged + * + * [Buf_Write_Count, Buf_Write_Alloc_Count): + * unfilled region allocated for writing but not yet written + * + * [Buf_Write_Alloc_Count, Buf_Read_Count + prealloc_bufsz): + * unfilled region not yet allocated for writing + * + * [Buf_Read_Count + prealloc_bufsz, infinity): + * unfilled region not yet accessible + * + * Data needs to be written into the buffer before it can be read out, + * and may need to be converted (or "munged") between the two + * operations. Extra unfilled buffer space may need to allocated for + * writing (advancing Buf_Write_Alloc_Count) before new data is written. + * After writing new data, the newly filled space needs to be released + * (advancing Buf_Write_Count). This also results in the new data being + * "munged" (advancing Munge_Count). Before data is read out of the + * buffer, extra space may need to be allocated for reading (advancing + * Buf_Read_Alloc_Count). After the data has been read out, the space + * needs to be released (advancing Buf_Read_Count). + * + * The actual members, buf_read_count, buf_read_alloc_count, + * munge_count, buf_write_count, and buf_write_alloc_count take the + * value of the corresponding capitalized _Count values modulo 2^32 + * (UINT_MAX+1). Subtracting a "higher" _count value from a "lower" + * _count value gives the same answer as subtracting a "higher" _Count + * value from a lower _Count value because prealloc_bufsz < UINT_MAX+1. + * The modulo operation is done implicitly. + * + * The buf_read_ptr, munge_ptr, and buf_write_ptr members take the value + * of the corresponding capitalized _Count values modulo prealloc_bufsz. + * These correspond to byte indices in the physical buffer. The modulo + * operation is done by subtracting prealloc_bufsz when the value + * exceeds prealloc_bufsz (assuming prealloc_bufsz plus the increment is + * less than or equal to UINT_MAX). + */ +struct comedi_async { + void *prealloc_buf; + unsigned int prealloc_bufsz; + struct comedi_buf_map *buf_map; + unsigned int max_bufsize; + unsigned int buf_write_count; + unsigned int buf_write_alloc_count; + unsigned int buf_read_count; + unsigned int buf_read_alloc_count; + unsigned int buf_write_ptr; + unsigned int buf_read_ptr; + unsigned int cur_chan; + unsigned int scans_done; + unsigned int scan_progress; + unsigned int munge_chan; + unsigned int munge_count; + unsigned int munge_ptr; + unsigned int events; + struct comedi_cmd cmd; + wait_queue_head_t wait_head; + unsigned int cb_mask; + int (*inttrig)(struct comedi_device *dev, struct comedi_subdevice *s, + unsigned int x); +}; + +/** + * enum comedi_cb - &struct comedi_async callback "events" + * @COMEDI_CB_EOS: end-of-scan + * @COMEDI_CB_EOA: end-of-acquisition/output + * @COMEDI_CB_BLOCK: data has arrived, wakes up read() / write() + * @COMEDI_CB_EOBUF: DEPRECATED: end of buffer + * @COMEDI_CB_ERROR: card error during acquisition + * @COMEDI_CB_OVERFLOW: buffer overflow/underflow + * @COMEDI_CB_ERROR_MASK: events that indicate an error has occurred + * @COMEDI_CB_CANCEL_MASK: events that will cancel an async command + */ +enum comedi_cb { + COMEDI_CB_EOS = BIT(0), + COMEDI_CB_EOA = BIT(1), + COMEDI_CB_BLOCK = BIT(2), + COMEDI_CB_EOBUF = BIT(3), + COMEDI_CB_ERROR = BIT(4), + COMEDI_CB_OVERFLOW = BIT(5), + /* masks */ + COMEDI_CB_ERROR_MASK = (COMEDI_CB_ERROR | COMEDI_CB_OVERFLOW), + COMEDI_CB_CANCEL_MASK = (COMEDI_CB_EOA | COMEDI_CB_ERROR_MASK) +}; + +/** + * struct comedi_driver - COMEDI driver registration + * @driver_name: Name of driver. + * @module: Owning module. + * @attach: The optional "attach" handler for manually configured COMEDI + * devices. + * @detach: The "detach" handler for deconfiguring COMEDI devices. + * @auto_attach: The optional "auto_attach" handler for automatically + * configured COMEDI devices. + * @num_names: Optional number of "board names" supported. + * @board_name: Optional pointer to a pointer to a board name. The pointer + * to a board name is embedded in an element of a driver-defined array + * of static, read-only board type information. + * @offset: Optional size of each element of the driver-defined array of + * static, read-only board type information, i.e. the offset between each + * pointer to a board name. + * + * This is used with comedi_driver_register() and comedi_driver_unregister() to + * register and unregister a low-level COMEDI driver with the COMEDI core. + * + * If @num_names is non-zero, @board_name should be non-NULL, and @offset + * should be at least sizeof(*board_name). These are used by the handler for + * the %COMEDI_DEVCONFIG ioctl to match a hardware device and its driver by + * board name. If @num_names is zero, the %COMEDI_DEVCONFIG ioctl matches a + * hardware device and its driver by driver name. This is only useful if the + * @attach handler is set. If @num_names is non-zero, the driver's @attach + * handler will be called with the COMEDI device structure's board_ptr member + * pointing to the matched pointer to a board name within the driver's private + * array of static, read-only board type information. + * + * The @detach handler has two roles. If a COMEDI device was successfully + * configured by the @attach or @auto_attach handler, it is called when the + * device is being deconfigured (by the %COMEDI_DEVCONFIG ioctl, or due to + * unloading of the driver, or due to device removal). It is also called when + * the @attach or @auto_attach handler returns an error. Therefore, the + * @attach or @auto_attach handlers can defer clean-up on error until the + * @detach handler is called. If the @attach or @auto_attach handlers free + * any resources themselves, they must prevent the @detach handler from + * freeing the same resources. The @detach handler must not assume that all + * resources requested by the @attach or @auto_attach handler were + * successfully allocated. + */ +struct comedi_driver { + /* private: */ + struct comedi_driver *next; /* Next in list of COMEDI drivers. */ + /* public: */ + const char *driver_name; + struct module *module; + int (*attach)(struct comedi_device *dev, struct comedi_devconfig *it); + void (*detach)(struct comedi_device *dev); + int (*auto_attach)(struct comedi_device *dev, unsigned long context); + unsigned int num_names; + const char *const *board_name; + int offset; +}; + +/** + * struct comedi_device - Working data for a COMEDI device + * @use_count: Number of open file objects. + * @driver: Low-level COMEDI driver attached to this COMEDI device. + * @pacer: Optional pointer to a dynamically allocated acquisition pacer + * control. It is freed automatically after the COMEDI device is + * detached from the low-level driver. + * @private: Optional pointer to private data allocated by the low-level + * driver. It is freed automatically after the COMEDI device is + * detached from the low-level driver. + * @class_dev: Sysfs comediX device. + * @minor: Minor device number of COMEDI char device (0-47). + * @detach_count: Counter incremented every time the COMEDI device is detached. + * Used for checking a previous attachment is still valid. + * @hw_dev: Optional pointer to the low-level hardware &struct device. It is + * required for automatically configured COMEDI devices and optional for + * COMEDI devices configured by the %COMEDI_DEVCONFIG ioctl, although + * the bus-specific COMEDI functions only work if it is set correctly. + * It is also passed to dma_alloc_coherent() for COMEDI subdevices that + * have their 'async_dma_dir' member set to something other than + * %DMA_NONE. + * @board_name: Pointer to a COMEDI board name or a COMEDI driver name. When + * the low-level driver's "attach" handler is called by the handler for + * the %COMEDI_DEVCONFIG ioctl, it either points to a matched board name + * string if the 'num_names' member of the &struct comedi_driver is + * non-zero, otherwise it points to the low-level driver name string. + * When the low-lever driver's "auto_attach" handler is called for an + * automatically configured COMEDI device, it points to the low-level + * driver name string. The low-level driver is free to change it in its + * "attach" or "auto_attach" handler if it wishes. + * @board_ptr: Optional pointer to private, read-only board type information in + * the low-level driver. If the 'num_names' member of the &struct + * comedi_driver is non-zero, the handler for the %COMEDI_DEVCONFIG ioctl + * will point it to a pointer to a matched board name string within the + * driver's private array of static, read-only board type information when + * calling the driver's "attach" handler. The low-level driver is free to + * change it. + * @attached: Flag indicating that the COMEDI device is attached to a low-level + * driver. + * @ioenabled: Flag used to indicate that a PCI device has been enabled and + * its regions requested. + * @spinlock: Generic spin-lock for use by the low-level driver. + * @mutex: Generic mutex for use by the COMEDI core module. + * @attach_lock: &struct rw_semaphore used to guard against the COMEDI device + * being detached while an operation is in progress. The down_write() + * operation is only allowed while @mutex is held and is used when + * changing @attached and @detach_count and calling the low-level driver's + * "detach" handler. The down_read() operation is generally used without + * holding @mutex. + * @refcount: &struct kref reference counter for freeing COMEDI device. + * @n_subdevices: Number of COMEDI subdevices allocated by the low-level + * driver for this device. + * @subdevices: Dynamically allocated array of COMEDI subdevices. + * @mmio: Optional pointer to a remapped MMIO region set by the low-level + * driver. + * @iobase: Optional base of an I/O port region requested by the low-level + * driver. + * @iolen: Length of I/O port region requested at @iobase. + * @irq: Optional IRQ number requested by the low-level driver. + * @read_subdev: Optional pointer to a default COMEDI subdevice operated on by + * the read() file operation. Set by the low-level driver. + * @write_subdev: Optional pointer to a default COMEDI subdevice operated on by + * the write() file operation. Set by the low-level driver. + * @async_queue: Storage for fasync_helper(). + * @open: Optional pointer to a function set by the low-level driver to be + * called when @use_count changes from 0 to 1. + * @close: Optional pointer to a function set by the low-level driver to be + * called when @use_count changed from 1 to 0. + * @insn_device_config: Optional pointer to a handler for all sub-instructions + * except %INSN_DEVICE_CONFIG_GET_ROUTES of the %INSN_DEVICE_CONFIG + * instruction. If this is not initialized by the low-level driver, a + * default handler will be set during post-configuration. + * @get_valid_routes: Optional pointer to a handler for the + * %INSN_DEVICE_CONFIG_GET_ROUTES sub-instruction of the + * %INSN_DEVICE_CONFIG instruction set. If this is not initialized by the + * low-level driver, a default handler that copies zero routes back to the + * user will be used. + * + * This is the main control data structure for a COMEDI device (as far as the + * COMEDI core is concerned). There are two groups of COMEDI devices - + * "legacy" devices that are configured by the handler for the + * %COMEDI_DEVCONFIG ioctl, and automatically configured devices resulting + * from a call to comedi_auto_config() as a result of a bus driver probe in + * a low-level COMEDI driver. The "legacy" COMEDI devices are allocated + * during module initialization if the "comedi_num_legacy_minors" module + * parameter is non-zero and use minor device numbers from 0 to + * comedi_num_legacy_minors minus one. The automatically configured COMEDI + * devices are allocated on demand and use minor device numbers from + * comedi_num_legacy_minors to 47. + */ +struct comedi_device { + int use_count; + struct comedi_driver *driver; + struct comedi_8254 *pacer; + void *private; + + struct device *class_dev; + int minor; + unsigned int detach_count; + struct device *hw_dev; + + const char *board_name; + const void *board_ptr; + unsigned int attached:1; + unsigned int ioenabled:1; + spinlock_t spinlock; /* generic spin-lock for low-level driver */ + struct mutex mutex; /* generic mutex for COMEDI core */ + struct rw_semaphore attach_lock; + struct kref refcount; + + int n_subdevices; + struct comedi_subdevice *subdevices; + + /* dumb */ + void __iomem *mmio; + unsigned long iobase; + unsigned long iolen; + unsigned int irq; + + struct comedi_subdevice *read_subdev; + struct comedi_subdevice *write_subdev; + + struct fasync_struct *async_queue; + + int (*open)(struct comedi_device *dev); + void (*close)(struct comedi_device *dev); + int (*insn_device_config)(struct comedi_device *dev, + struct comedi_insn *insn, unsigned int *data); + unsigned int (*get_valid_routes)(struct comedi_device *dev, + unsigned int n_pairs, + unsigned int *pair_data); +}; + +/* + * function prototypes + */ + +void comedi_event(struct comedi_device *dev, struct comedi_subdevice *s); + +struct comedi_device *comedi_dev_get_from_minor(unsigned int minor); +int comedi_dev_put(struct comedi_device *dev); + +bool comedi_is_subdevice_running(struct comedi_subdevice *s); + +void *comedi_alloc_spriv(struct comedi_subdevice *s, size_t size); +void comedi_set_spriv_auto_free(struct comedi_subdevice *s); + +int comedi_check_chanlist(struct comedi_subdevice *s, + int n, + unsigned int *chanlist); + +/* range stuff */ + +#define RANGE(a, b) {(a) * 1e6, (b) * 1e6, 0} +#define RANGE_ext(a, b) {(a) * 1e6, (b) * 1e6, RF_EXTERNAL} +#define RANGE_mA(a, b) {(a) * 1e6, (b) * 1e6, UNIT_mA} +#define RANGE_unitless(a, b) {(a) * 1e6, (b) * 1e6, 0} +#define BIP_RANGE(a) {-(a) * 1e6, (a) * 1e6, 0} +#define UNI_RANGE(a) {0, (a) * 1e6, 0} + +extern const struct comedi_lrange range_bipolar10; +extern const struct comedi_lrange range_bipolar5; +extern const struct comedi_lrange range_bipolar2_5; +extern const struct comedi_lrange range_unipolar10; +extern const struct comedi_lrange range_unipolar5; +extern const struct comedi_lrange range_unipolar2_5; +extern const struct comedi_lrange range_0_20mA; +extern const struct comedi_lrange range_4_20mA; +extern const struct comedi_lrange range_0_32mA; +extern const struct comedi_lrange range_unknown; + +#define range_digital range_unipolar5 + +/** + * struct comedi_lrange - Describes a COMEDI range table + * @length: Number of entries in the range table. + * @range: Array of &struct comedi_krange, one for each range. + * + * Each element of @range[] describes the minimum and maximum physical range + * and the type of units. Typically, the type of unit is %UNIT_volt + * (i.e. volts) and the minimum and maximum are in millionths of a volt. + * There may also be a flag that indicates the minimum and maximum are merely + * scale factors for an unknown, external reference. + */ +struct comedi_lrange { + int length; + struct comedi_krange range[]; +}; + +/** + * comedi_range_is_bipolar() - Test if subdevice range is bipolar + * @s: COMEDI subdevice. + * @range: Index of range within a range table. + * + * Tests whether a range is bipolar by checking whether its minimum value + * is negative. + * + * Assumes @range is valid. Does not work for subdevices using a + * channel-specific range table list. + * + * Return: + * %true if the range is bipolar. + * %false if the range is unipolar. + */ +static inline bool comedi_range_is_bipolar(struct comedi_subdevice *s, + unsigned int range) +{ + return s->range_table->range[range].min < 0; +} + +/** + * comedi_range_is_unipolar() - Test if subdevice range is unipolar + * @s: COMEDI subdevice. + * @range: Index of range within a range table. + * + * Tests whether a range is unipolar by checking whether its minimum value + * is at least 0. + * + * Assumes @range is valid. Does not work for subdevices using a + * channel-specific range table list. + * + * Return: + * %true if the range is unipolar. + * %false if the range is bipolar. + */ +static inline bool comedi_range_is_unipolar(struct comedi_subdevice *s, + unsigned int range) +{ + return s->range_table->range[range].min >= 0; +} + +/** + * comedi_range_is_external() - Test if subdevice range is external + * @s: COMEDI subdevice. + * @range: Index of range within a range table. + * + * Tests whether a range is externally reference by checking whether its + * %RF_EXTERNAL flag is set. + * + * Assumes @range is valid. Does not work for subdevices using a + * channel-specific range table list. + * + * Return: + * %true if the range is external. + * %false if the range is internal. + */ +static inline bool comedi_range_is_external(struct comedi_subdevice *s, + unsigned int range) +{ + return !!(s->range_table->range[range].flags & RF_EXTERNAL); +} + +/** + * comedi_chan_range_is_bipolar() - Test if channel-specific range is bipolar + * @s: COMEDI subdevice. + * @chan: The channel number. + * @range: Index of range within a range table. + * + * Tests whether a range is bipolar by checking whether its minimum value + * is negative. + * + * Assumes @chan and @range are valid. Only works for subdevices with a + * channel-specific range table list. + * + * Return: + * %true if the range is bipolar. + * %false if the range is unipolar. + */ +static inline bool comedi_chan_range_is_bipolar(struct comedi_subdevice *s, + unsigned int chan, + unsigned int range) +{ + return s->range_table_list[chan]->range[range].min < 0; +} + +/** + * comedi_chan_range_is_unipolar() - Test if channel-specific range is unipolar + * @s: COMEDI subdevice. + * @chan: The channel number. + * @range: Index of range within a range table. + * + * Tests whether a range is unipolar by checking whether its minimum value + * is at least 0. + * + * Assumes @chan and @range are valid. Only works for subdevices with a + * channel-specific range table list. + * + * Return: + * %true if the range is unipolar. + * %false if the range is bipolar. + */ +static inline bool comedi_chan_range_is_unipolar(struct comedi_subdevice *s, + unsigned int chan, + unsigned int range) +{ + return s->range_table_list[chan]->range[range].min >= 0; +} + +/** + * comedi_chan_range_is_external() - Test if channel-specific range is external + * @s: COMEDI subdevice. + * @chan: The channel number. + * @range: Index of range within a range table. + * + * Tests whether a range is externally reference by checking whether its + * %RF_EXTERNAL flag is set. + * + * Assumes @chan and @range are valid. Only works for subdevices with a + * channel-specific range table list. + * + * Return: + * %true if the range is bipolar. + * %false if the range is unipolar. + */ +static inline bool comedi_chan_range_is_external(struct comedi_subdevice *s, + unsigned int chan, + unsigned int range) +{ + return !!(s->range_table_list[chan]->range[range].flags & RF_EXTERNAL); +} + +/** + * comedi_offset_munge() - Convert between offset binary and 2's complement + * @s: COMEDI subdevice. + * @val: Value to be converted. + * + * Toggles the highest bit of a sample value to toggle between offset binary + * and 2's complement. Assumes that @s->maxdata is a power of 2 minus 1. + * + * Return: The converted value. + */ +static inline unsigned int comedi_offset_munge(struct comedi_subdevice *s, + unsigned int val) +{ + return val ^ s->maxdata ^ (s->maxdata >> 1); +} + +/** + * comedi_bytes_per_sample() - Determine subdevice sample size + * @s: COMEDI subdevice. + * + * The sample size will be 4 (sizeof int) or 2 (sizeof short) depending on + * whether the %SDF_LSAMPL subdevice flag is set or not. + * + * Return: The subdevice sample size. + */ +static inline unsigned int comedi_bytes_per_sample(struct comedi_subdevice *s) +{ + return s->subdev_flags & SDF_LSAMPL ? sizeof(int) : sizeof(short); +} + +/** + * comedi_sample_shift() - Determine log2 of subdevice sample size + * @s: COMEDI subdevice. + * + * The sample size will be 4 (sizeof int) or 2 (sizeof short) depending on + * whether the %SDF_LSAMPL subdevice flag is set or not. The log2 of the + * sample size will be 2 or 1 and can be used as the right operand of a + * bit-shift operator to multiply or divide something by the sample size. + * + * Return: log2 of the subdevice sample size. + */ +static inline unsigned int comedi_sample_shift(struct comedi_subdevice *s) +{ + return s->subdev_flags & SDF_LSAMPL ? 2 : 1; +} + +/** + * comedi_bytes_to_samples() - Convert a number of bytes to a number of samples + * @s: COMEDI subdevice. + * @nbytes: Number of bytes + * + * Return: The number of bytes divided by the subdevice sample size. + */ +static inline unsigned int comedi_bytes_to_samples(struct comedi_subdevice *s, + unsigned int nbytes) +{ + return nbytes >> comedi_sample_shift(s); +} + +/** + * comedi_samples_to_bytes() - Convert a number of samples to a number of bytes + * @s: COMEDI subdevice. + * @nsamples: Number of samples. + * + * Return: The number of samples multiplied by the subdevice sample size. + * (Does not check for arithmetic overflow.) + */ +static inline unsigned int comedi_samples_to_bytes(struct comedi_subdevice *s, + unsigned int nsamples) +{ + return nsamples << comedi_sample_shift(s); +} + +/** + * comedi_check_trigger_src() - Trivially validate a comedi_cmd trigger source + * @src: Pointer to the trigger source to validate. + * @flags: Bitmask of valid %TRIG_* for the trigger. + * + * This is used in "step 1" of the do_cmdtest functions of comedi drivers + * to validate the comedi_cmd triggers. The mask of the @src against the + * @flags allows the userspace comedilib to pass all the comedi_cmd + * triggers as %TRIG_ANY and get back a bitmask of the valid trigger sources. + * + * Return: + * 0 if trigger sources in *@src are all supported. + * -EINVAL if any trigger source in *@src is unsupported. + */ +static inline int comedi_check_trigger_src(unsigned int *src, + unsigned int flags) +{ + unsigned int orig_src = *src; + + *src = orig_src & flags; + if (*src == TRIG_INVALID || *src != orig_src) + return -EINVAL; + return 0; +} + +/** + * comedi_check_trigger_is_unique() - Make sure a trigger source is unique + * @src: The trigger source to check. + * + * Return: + * 0 if no more than one trigger source is set. + * -EINVAL if more than one trigger source is set. + */ +static inline int comedi_check_trigger_is_unique(unsigned int src) +{ + /* this test is true if more than one _src bit is set */ + if ((src & (src - 1)) != 0) + return -EINVAL; + return 0; +} + +/** + * comedi_check_trigger_arg_is() - Trivially validate a trigger argument + * @arg: Pointer to the trigger arg to validate. + * @val: The value the argument should be. + * + * Forces *@arg to be @val. + * + * Return: + * 0 if *@arg was already @val. + * -EINVAL if *@arg differed from @val. + */ +static inline int comedi_check_trigger_arg_is(unsigned int *arg, + unsigned int val) +{ + if (*arg != val) { + *arg = val; + return -EINVAL; + } + return 0; +} + +/** + * comedi_check_trigger_arg_min() - Trivially validate a trigger argument min + * @arg: Pointer to the trigger arg to validate. + * @val: The minimum value the argument should be. + * + * Forces *@arg to be at least @val, setting it to @val if necessary. + * + * Return: + * 0 if *@arg was already at least @val. + * -EINVAL if *@arg was less than @val. + */ +static inline int comedi_check_trigger_arg_min(unsigned int *arg, + unsigned int val) +{ + if (*arg < val) { + *arg = val; + return -EINVAL; + } + return 0; +} + +/** + * comedi_check_trigger_arg_max() - Trivially validate a trigger argument max + * @arg: Pointer to the trigger arg to validate. + * @val: The maximum value the argument should be. + * + * Forces *@arg to be no more than @val, setting it to @val if necessary. + * + * Return: + * 0 if*@arg was already no more than @val. + * -EINVAL if *@arg was greater than @val. + */ +static inline int comedi_check_trigger_arg_max(unsigned int *arg, + unsigned int val) +{ + if (*arg > val) { + *arg = val; + return -EINVAL; + } + return 0; +} + +/* + * Must set dev->hw_dev if you wish to dma directly into comedi's buffer. + * Also useful for retrieving a previously configured hardware device of + * known bus type. Set automatically for auto-configured devices. + * Automatically set to NULL when detaching hardware device. + */ +int comedi_set_hw_dev(struct comedi_device *dev, struct device *hw_dev); + +/** + * comedi_buf_n_bytes_ready - Determine amount of unread data in buffer + * @s: COMEDI subdevice. + * + * Determines the number of bytes of unread data in the asynchronous + * acquisition data buffer for a subdevice. The data in question might not + * have been fully "munged" yet. + * + * Returns: The amount of unread data in bytes. + */ +static inline unsigned int comedi_buf_n_bytes_ready(struct comedi_subdevice *s) +{ + return s->async->buf_write_count - s->async->buf_read_count; +} + +unsigned int comedi_buf_write_alloc(struct comedi_subdevice *s, unsigned int n); +unsigned int comedi_buf_write_free(struct comedi_subdevice *s, unsigned int n); + +unsigned int comedi_buf_read_n_available(struct comedi_subdevice *s); +unsigned int comedi_buf_read_alloc(struct comedi_subdevice *s, unsigned int n); +unsigned int comedi_buf_read_free(struct comedi_subdevice *s, unsigned int n); + +unsigned int comedi_buf_write_samples(struct comedi_subdevice *s, + const void *data, unsigned int nsamples); +unsigned int comedi_buf_read_samples(struct comedi_subdevice *s, + void *data, unsigned int nsamples); + +/* drivers.c - general comedi driver functions */ + +#define COMEDI_TIMEOUT_MS 1000 + +int comedi_timeout(struct comedi_device *dev, struct comedi_subdevice *s, + struct comedi_insn *insn, + int (*cb)(struct comedi_device *dev, + struct comedi_subdevice *s, + struct comedi_insn *insn, unsigned long context), + unsigned long context); + +unsigned int comedi_handle_events(struct comedi_device *dev, + struct comedi_subdevice *s); + +int comedi_dio_insn_config(struct comedi_device *dev, + struct comedi_subdevice *s, + struct comedi_insn *insn, unsigned int *data, + unsigned int mask); +unsigned int comedi_dio_update_state(struct comedi_subdevice *s, + unsigned int *data); +unsigned int comedi_bytes_per_scan_cmd(struct comedi_subdevice *s, + struct comedi_cmd *cmd); +unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s); +unsigned int comedi_nscans_left(struct comedi_subdevice *s, + unsigned int nscans); +unsigned int comedi_nsamples_left(struct comedi_subdevice *s, + unsigned int nsamples); +void comedi_inc_scan_progress(struct comedi_subdevice *s, + unsigned int num_bytes); + +void *comedi_alloc_devpriv(struct comedi_device *dev, size_t size); +int comedi_alloc_subdevices(struct comedi_device *dev, int num_subdevices); +int comedi_alloc_subdev_readback(struct comedi_subdevice *s); + +int comedi_readback_insn_read(struct comedi_device *dev, + struct comedi_subdevice *s, + struct comedi_insn *insn, unsigned int *data); + +int comedi_load_firmware(struct comedi_device *dev, struct device *hw_dev, + const char *name, + int (*cb)(struct comedi_device *dev, + const u8 *data, size_t size, + unsigned long context), + unsigned long context); + +int __comedi_request_region(struct comedi_device *dev, + unsigned long start, unsigned long len); +int comedi_request_region(struct comedi_device *dev, + unsigned long start, unsigned long len); +void comedi_legacy_detach(struct comedi_device *dev); + +int comedi_auto_config(struct device *hardware_device, + struct comedi_driver *driver, unsigned long context); +void comedi_auto_unconfig(struct device *hardware_device); + +int comedi_driver_register(struct comedi_driver *driver); +void comedi_driver_unregister(struct comedi_driver *driver); + +/** + * module_comedi_driver() - Helper macro for registering a comedi driver + * @__comedi_driver: comedi_driver struct + * + * Helper macro for comedi drivers which do not do anything special in module + * init/exit. This eliminates a lot of boilerplate. Each module may only use + * this macro once, and calling it replaces module_init() and module_exit(). + */ +#define module_comedi_driver(__comedi_driver) \ + module_driver(__comedi_driver, comedi_driver_register, \ + comedi_driver_unregister) + +#endif /* _COMEDIDEV_H */ diff --git a/include/linux/comedi/comedilib.h b/include/linux/comedi/comedilib.h new file mode 100644 index 000000000000..0223c9cd9215 --- /dev/null +++ b/include/linux/comedi/comedilib.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * comedilib.h + * Header file for kcomedilib + * + * COMEDI - Linux Control and Measurement Device Interface + * Copyright (C) 1998-2001 David A. Schleef + */ + +#ifndef _LINUX_COMEDILIB_H +#define _LINUX_COMEDILIB_H + +struct comedi_device *comedi_open(const char *path); +int comedi_close(struct comedi_device *dev); +int comedi_dio_get_config(struct comedi_device *dev, unsigned int subdev, + unsigned int chan, unsigned int *io); +int comedi_dio_config(struct comedi_device *dev, unsigned int subdev, + unsigned int chan, unsigned int io); +int comedi_dio_bitfield2(struct comedi_device *dev, unsigned int subdev, + unsigned int mask, unsigned int *bits, + unsigned int base_channel); +int comedi_find_subdevice_by_type(struct comedi_device *dev, int type, + unsigned int subd); +int comedi_get_n_channels(struct comedi_device *dev, unsigned int subdevice); + +#endif diff --git a/include/uapi/linux/comedi.h b/include/uapi/linux/comedi.h new file mode 100644 index 000000000000..7314e5ee0a1e --- /dev/null +++ b/include/uapi/linux/comedi.h @@ -0,0 +1,1528 @@ +/* SPDX-License-Identifier: LGPL-2.0+ WITH Linux-syscall-note */ +/* + * comedi.h + * header file for COMEDI user API + * + * COMEDI - Linux Control and Measurement Device Interface + * Copyright (C) 1998-2001 David A. Schleef + */ + +#ifndef _COMEDI_H +#define _COMEDI_H + +#define COMEDI_MAJORVERSION 0 +#define COMEDI_MINORVERSION 7 +#define COMEDI_MICROVERSION 76 +#define VERSION "0.7.76" + +/* comedi's major device number */ +#define COMEDI_MAJOR 98 + +/* + * maximum number of minor devices. This can be increased, although + * kernel structures are currently statically allocated, thus you + * don't want this to be much more than you actually use. + */ +#define COMEDI_NDEVICES 16 + +/* number of config options in the config structure */ +#define COMEDI_NDEVCONFOPTS 32 + +/* + * NOTE: 'comedi_config --init-data' is deprecated + * + * The following indexes in the config options were used by + * comedi_config to pass firmware blobs from user space to the + * comedi drivers. The request_firmware() hotplug interface is + * now used by all comedi drivers instead. + */ + +/* length of nth chunk of firmware data -*/ +#define COMEDI_DEVCONF_AUX_DATA3_LENGTH 25 +#define COMEDI_DEVCONF_AUX_DATA2_LENGTH 26 +#define COMEDI_DEVCONF_AUX_DATA1_LENGTH 27 +#define COMEDI_DEVCONF_AUX_DATA0_LENGTH 28 +/* most significant 32 bits of pointer address (if needed) */ +#define COMEDI_DEVCONF_AUX_DATA_HI 29 +/* least significant 32 bits of pointer address */ +#define COMEDI_DEVCONF_AUX_DATA_LO 30 +#define COMEDI_DEVCONF_AUX_DATA_LENGTH 31 /* total data length */ + +/* max length of device and driver names */ +#define COMEDI_NAMELEN 20 + +/* packs and unpacks a channel/range number */ + +#define CR_PACK(chan, rng, aref) \ + ((((aref) & 0x3) << 24) | (((rng) & 0xff) << 16) | (chan)) +#define CR_PACK_FLAGS(chan, range, aref, flags) \ + (CR_PACK(chan, range, aref) | ((flags) & CR_FLAGS_MASK)) + +#define CR_CHAN(a) ((a) & 0xffff) +#define CR_RANGE(a) (((a) >> 16) & 0xff) +#define CR_AREF(a) (((a) >> 24) & 0x03) + +#define CR_FLAGS_MASK 0xfc000000 +#define CR_ALT_FILTER 0x04000000 +#define CR_DITHER CR_ALT_FILTER +#define CR_DEGLITCH CR_ALT_FILTER +#define CR_ALT_SOURCE 0x08000000 +#define CR_EDGE 0x40000000 +#define CR_INVERT 0x80000000 + +#define AREF_GROUND 0x00 /* analog ref = analog ground */ +#define AREF_COMMON 0x01 /* analog ref = analog common */ +#define AREF_DIFF 0x02 /* analog ref = differential */ +#define AREF_OTHER 0x03 /* analog ref = other (undefined) */ + +/* counters -- these are arbitrary values */ +#define GPCT_RESET 0x0001 +#define GPCT_SET_SOURCE 0x0002 +#define GPCT_SET_GATE 0x0004 +#define GPCT_SET_DIRECTION 0x0008 +#define GPCT_SET_OPERATION 0x0010 +#define GPCT_ARM 0x0020 +#define GPCT_DISARM 0x0040 +#define GPCT_GET_INT_CLK_FRQ 0x0080 + +#define GPCT_INT_CLOCK 0x0001 +#define GPCT_EXT_PIN 0x0002 +#define GPCT_NO_GATE 0x0004 +#define GPCT_UP 0x0008 +#define GPCT_DOWN 0x0010 +#define GPCT_HWUD 0x0020 +#define GPCT_SIMPLE_EVENT 0x0040 +#define GPCT_SINGLE_PERIOD 0x0080 +#define GPCT_SINGLE_PW 0x0100 +#define GPCT_CONT_PULSE_OUT 0x0200 +#define GPCT_SINGLE_PULSE_OUT 0x0400 + +/* instructions */ + +#define INSN_MASK_WRITE 0x8000000 +#define INSN_MASK_READ 0x4000000 +#define INSN_MASK_SPECIAL 0x2000000 + +#define INSN_READ (0 | INSN_MASK_READ) +#define INSN_WRITE (1 | INSN_MASK_WRITE) +#define INSN_BITS (2 | INSN_MASK_READ | INSN_MASK_WRITE) +#define INSN_CONFIG (3 | INSN_MASK_READ | INSN_MASK_WRITE) +#define INSN_DEVICE_CONFIG (INSN_CONFIG | INSN_MASK_SPECIAL) +#define INSN_GTOD (4 | INSN_MASK_READ | INSN_MASK_SPECIAL) +#define INSN_WAIT (5 | INSN_MASK_WRITE | INSN_MASK_SPECIAL) +#define INSN_INTTRIG (6 | INSN_MASK_WRITE | INSN_MASK_SPECIAL) + +/* command flags */ +/* These flags are used in comedi_cmd structures */ + +#define CMDF_BOGUS 0x00000001 /* do the motions */ + +/* try to use a real-time interrupt while performing command */ +#define CMDF_PRIORITY 0x00000008 + +/* wake up on end-of-scan events */ +#define CMDF_WAKE_EOS 0x00000020 + +#define CMDF_WRITE 0x00000040 + +#define CMDF_RAWDATA 0x00000080 + +/* timer rounding definitions */ +#define CMDF_ROUND_MASK 0x00030000 +#define CMDF_ROUND_NEAREST 0x00000000 +#define CMDF_ROUND_DOWN 0x00010000 +#define CMDF_ROUND_UP 0x00020000 +#define CMDF_ROUND_UP_NEXT 0x00030000 + +#define COMEDI_EV_START 0x00040000 +#define COMEDI_EV_SCAN_BEGIN 0x00080000 +#define COMEDI_EV_CONVERT 0x00100000 +#define COMEDI_EV_SCAN_END 0x00200000 +#define COMEDI_EV_STOP 0x00400000 + +/* compatibility definitions */ +#define TRIG_BOGUS CMDF_BOGUS +#define TRIG_RT CMDF_PRIORITY +#define TRIG_WAKE_EOS CMDF_WAKE_EOS +#define TRIG_WRITE CMDF_WRITE +#define TRIG_ROUND_MASK CMDF_ROUND_MASK +#define TRIG_ROUND_NEAREST CMDF_ROUND_NEAREST +#define TRIG_ROUND_DOWN CMDF_ROUND_DOWN +#define TRIG_ROUND_UP CMDF_ROUND_UP +#define TRIG_ROUND_UP_NEXT CMDF_ROUND_UP_NEXT + +/* trigger sources */ + +#define TRIG_ANY 0xffffffff +#define TRIG_INVALID 0x00000000 + +#define TRIG_NONE 0x00000001 /* never trigger */ +#define TRIG_NOW 0x00000002 /* trigger now + N ns */ +#define TRIG_FOLLOW 0x00000004 /* trigger on next lower level trig */ +#define TRIG_TIME 0x00000008 /* trigger at time N ns */ +#define TRIG_TIMER 0x00000010 /* trigger at rate N ns */ +#define TRIG_COUNT 0x00000020 /* trigger when count reaches N */ +#define TRIG_EXT 0x00000040 /* trigger on external signal N */ +#define TRIG_INT 0x00000080 /* trigger on comedi-internal signal N */ +#define TRIG_OTHER 0x00000100 /* driver defined */ + +/* subdevice flags */ + +#define SDF_BUSY 0x0001 /* device is busy */ +#define SDF_BUSY_OWNER 0x0002 /* device is busy with your job */ +#define SDF_LOCKED 0x0004 /* subdevice is locked */ +#define SDF_LOCK_OWNER 0x0008 /* you own lock */ +#define SDF_MAXDATA 0x0010 /* maxdata depends on channel */ +#define SDF_FLAGS 0x0020 /* flags depend on channel */ +#define SDF_RANGETYPE 0x0040 /* range type depends on channel */ +#define SDF_PWM_COUNTER 0x0080 /* PWM can automatically switch off */ +#define SDF_PWM_HBRIDGE 0x0100 /* PWM is signed (H-bridge) */ +#define SDF_CMD 0x1000 /* can do commands (deprecated) */ +#define SDF_SOFT_CALIBRATED 0x2000 /* subdevice uses software calibration */ +#define SDF_CMD_WRITE 0x4000 /* can do output commands */ +#define SDF_CMD_READ 0x8000 /* can do input commands */ + +/* subdevice can be read (e.g. analog input) */ +#define SDF_READABLE 0x00010000 +/* subdevice can be written (e.g. analog output) */ +#define SDF_WRITABLE 0x00020000 +#define SDF_WRITEABLE SDF_WRITABLE /* spelling error in API */ +/* subdevice does not have externally visible lines */ +#define SDF_INTERNAL 0x00040000 +#define SDF_GROUND 0x00100000 /* can do aref=ground */ +#define SDF_COMMON 0x00200000 /* can do aref=common */ +#define SDF_DIFF 0x00400000 /* can do aref=diff */ +#define SDF_OTHER 0x00800000 /* can do aref=other */ +#define SDF_DITHER 0x01000000 /* can do dithering */ +#define SDF_DEGLITCH 0x02000000 /* can do deglitching */ +#define SDF_MMAP 0x04000000 /* can do mmap() */ +#define SDF_RUNNING 0x08000000 /* subdevice is acquiring data */ +#define SDF_LSAMPL 0x10000000 /* subdevice uses 32-bit samples */ +#define SDF_PACKED 0x20000000 /* subdevice can do packed DIO */ + +/* subdevice types */ + +/** + * enum comedi_subdevice_type - COMEDI subdevice types + * @COMEDI_SUBD_UNUSED: Unused subdevice. + * @COMEDI_SUBD_AI: Analog input. + * @COMEDI_SUBD_AO: Analog output. + * @COMEDI_SUBD_DI: Digital input. + * @COMEDI_SUBD_DO: Digital output. + * @COMEDI_SUBD_DIO: Digital input/output. + * @COMEDI_SUBD_COUNTER: Counter. + * @COMEDI_SUBD_TIMER: Timer. + * @COMEDI_SUBD_MEMORY: Memory, EEPROM, DPRAM. + * @COMEDI_SUBD_CALIB: Calibration DACs. + * @COMEDI_SUBD_PROC: Processor, DSP. + * @COMEDI_SUBD_SERIAL: Serial I/O. + * @COMEDI_SUBD_PWM: Pulse-Width Modulation output. + */ +enum comedi_subdevice_type { + COMEDI_SUBD_UNUSED, + COMEDI_SUBD_AI, + COMEDI_SUBD_AO, + COMEDI_SUBD_DI, + COMEDI_SUBD_DO, + COMEDI_SUBD_DIO, + COMEDI_SUBD_COUNTER, + COMEDI_SUBD_TIMER, + COMEDI_SUBD_MEMORY, + COMEDI_SUBD_CALIB, + COMEDI_SUBD_PROC, + COMEDI_SUBD_SERIAL, + COMEDI_SUBD_PWM +}; + +/* configuration instructions */ + +/** + * enum comedi_io_direction - COMEDI I/O directions + * @COMEDI_INPUT: Input. + * @COMEDI_OUTPUT: Output. + * @COMEDI_OPENDRAIN: Open-drain (or open-collector) output. + * + * These are used by the %INSN_CONFIG_DIO_QUERY configuration instruction to + * report a direction. They may also be used in other places where a direction + * needs to be specified. + */ +enum comedi_io_direction { + COMEDI_INPUT = 0, + COMEDI_OUTPUT = 1, + COMEDI_OPENDRAIN = 2 +}; + +/** + * enum configuration_ids - COMEDI configuration instruction codes + * @INSN_CONFIG_DIO_INPUT: Configure digital I/O as input. + * @INSN_CONFIG_DIO_OUTPUT: Configure digital I/O as output. + * @INSN_CONFIG_DIO_OPENDRAIN: Configure digital I/O as open-drain (or open + * collector) output. + * @INSN_CONFIG_ANALOG_TRIG: Configure analog trigger. + * @INSN_CONFIG_ALT_SOURCE: Configure alternate input source. + * @INSN_CONFIG_DIGITAL_TRIG: Configure digital trigger. + * @INSN_CONFIG_BLOCK_SIZE: Configure block size for DMA transfers. + * @INSN_CONFIG_TIMER_1: Configure divisor for external clock. + * @INSN_CONFIG_FILTER: Configure a filter. + * @INSN_CONFIG_CHANGE_NOTIFY: Configure change notification for digital + * inputs. (New drivers should use + * %INSN_CONFIG_DIGITAL_TRIG instead.) + * @INSN_CONFIG_SERIAL_CLOCK: Configure clock for serial I/O. + * @INSN_CONFIG_BIDIRECTIONAL_DATA: Send and receive byte over serial I/O. + * @INSN_CONFIG_DIO_QUERY: Query direction of digital I/O channel. + * @INSN_CONFIG_PWM_OUTPUT: Configure pulse-width modulator output. + * @INSN_CONFIG_GET_PWM_OUTPUT: Get pulse-width modulator output configuration. + * @INSN_CONFIG_ARM: Arm a subdevice or channel. + * @INSN_CONFIG_DISARM: Disarm a subdevice or channel. + * @INSN_CONFIG_GET_COUNTER_STATUS: Get counter status. + * @INSN_CONFIG_RESET: Reset a subdevice or channel. + * @INSN_CONFIG_GPCT_SINGLE_PULSE_GENERATOR: Configure counter/timer as + * single pulse generator. + * @INSN_CONFIG_GPCT_PULSE_TRAIN_GENERATOR: Configure counter/timer as + * pulse train generator. + * @INSN_CONFIG_GPCT_QUADRATURE_ENCODER: Configure counter as a quadrature + * encoder. + * @INSN_CONFIG_SET_GATE_SRC: Set counter/timer gate source. + * @INSN_CONFIG_GET_GATE_SRC: Get counter/timer gate source. + * @INSN_CONFIG_SET_CLOCK_SRC: Set counter/timer master clock source. + * @INSN_CONFIG_GET_CLOCK_SRC: Get counter/timer master clock source. + * @INSN_CONFIG_SET_OTHER_SRC: Set counter/timer "other" source. + * @INSN_CONFIG_GET_HARDWARE_BUFFER_SIZE: Get size (in bytes) of subdevice's + * on-board FIFOs used during streaming + * input/output. + * @INSN_CONFIG_SET_COUNTER_MODE: Set counter/timer mode. + * @INSN_CONFIG_8254_SET_MODE: (Deprecated) Same as + * %INSN_CONFIG_SET_COUNTER_MODE. + * @INSN_CONFIG_8254_READ_STATUS: Read status of 8254 counter channel. + * @INSN_CONFIG_SET_ROUTING: Set routing for a channel. + * @INSN_CONFIG_GET_ROUTING: Get routing for a channel. + * @INSN_CONFIG_PWM_SET_PERIOD: Set PWM period in nanoseconds. + * @INSN_CONFIG_PWM_GET_PERIOD: Get PWM period in nanoseconds. + * @INSN_CONFIG_GET_PWM_STATUS: Get PWM status. + * @INSN_CONFIG_PWM_SET_H_BRIDGE: Set PWM H bridge duty cycle and polarity for + * a relay simultaneously. + * @INSN_CONFIG_PWM_GET_H_BRIDGE: Get PWM H bridge duty cycle and polarity. + * @INSN_CONFIG_GET_CMD_TIMING_CONSTRAINTS: Get the hardware timing restraints, + * regardless of trigger sources. + */ +enum configuration_ids { + INSN_CONFIG_DIO_INPUT = COMEDI_INPUT, + INSN_CONFIG_DIO_OUTPUT = COMEDI_OUTPUT, + INSN_CONFIG_DIO_OPENDRAIN = COMEDI_OPENDRAIN, + INSN_CONFIG_ANALOG_TRIG = 16, +/* INSN_CONFIG_WAVEFORM = 17, */ +/* INSN_CONFIG_TRIG = 18, */ +/* INSN_CONFIG_COUNTER = 19, */ + INSN_CONFIG_ALT_SOURCE = 20, + INSN_CONFIG_DIGITAL_TRIG = 21, + INSN_CONFIG_BLOCK_SIZE = 22, + INSN_CONFIG_TIMER_1 = 23, + INSN_CONFIG_FILTER = 24, + INSN_CONFIG_CHANGE_NOTIFY = 25, + + INSN_CONFIG_SERIAL_CLOCK = 26, /*ALPHA*/ + INSN_CONFIG_BIDIRECTIONAL_DATA = 27, + INSN_CONFIG_DIO_QUERY = 28, + INSN_CONFIG_PWM_OUTPUT = 29, + INSN_CONFIG_GET_PWM_OUTPUT = 30, + INSN_CONFIG_ARM = 31, + INSN_CONFIG_DISARM = 32, + INSN_CONFIG_GET_COUNTER_STATUS = 33, + INSN_CONFIG_RESET = 34, + INSN_CONFIG_GPCT_SINGLE_PULSE_GENERATOR = 1001, + INSN_CONFIG_GPCT_PULSE_TRAIN_GENERATOR = 1002, + INSN_CONFIG_GPCT_QUADRATURE_ENCODER = 1003, + INSN_CONFIG_SET_GATE_SRC = 2001, + INSN_CONFIG_GET_GATE_SRC = 2002, + INSN_CONFIG_SET_CLOCK_SRC = 2003, + INSN_CONFIG_GET_CLOCK_SRC = 2004, + INSN_CONFIG_SET_OTHER_SRC = 2005, + INSN_CONFIG_GET_HARDWARE_BUFFER_SIZE = 2006, + INSN_CONFIG_SET_COUNTER_MODE = 4097, + INSN_CONFIG_8254_SET_MODE = INSN_CONFIG_SET_COUNTER_MODE, + INSN_CONFIG_8254_READ_STATUS = 4098, + INSN_CONFIG_SET_ROUTING = 4099, + INSN_CONFIG_GET_ROUTING = 4109, + INSN_CONFIG_PWM_SET_PERIOD = 5000, + INSN_CONFIG_PWM_GET_PERIOD = 5001, + INSN_CONFIG_GET_PWM_STATUS = 5002, + INSN_CONFIG_PWM_SET_H_BRIDGE = 5003, + INSN_CONFIG_PWM_GET_H_BRIDGE = 5004, + INSN_CONFIG_GET_CMD_TIMING_CONSTRAINTS = 5005, +}; + +/** + * enum device_configuration_ids - COMEDI configuration instruction codes global + * to an entire device. + * @INSN_DEVICE_CONFIG_TEST_ROUTE: Validate the possibility of a + * globally-named route + * @INSN_DEVICE_CONFIG_CONNECT_ROUTE: Connect a globally-named route + * @INSN_DEVICE_CONFIG_DISCONNECT_ROUTE:Disconnect a globally-named route + * @INSN_DEVICE_CONFIG_GET_ROUTES: Get a list of all globally-named routes + * that are valid for a particular device. + */ +enum device_config_route_ids { + INSN_DEVICE_CONFIG_TEST_ROUTE = 0, + INSN_DEVICE_CONFIG_CONNECT_ROUTE = 1, + INSN_DEVICE_CONFIG_DISCONNECT_ROUTE = 2, + INSN_DEVICE_CONFIG_GET_ROUTES = 3, +}; + +/** + * enum comedi_digital_trig_op - operations for configuring a digital trigger + * @COMEDI_DIGITAL_TRIG_DISABLE: Return digital trigger to its default, + * inactive, unconfigured state. + * @COMEDI_DIGITAL_TRIG_ENABLE_EDGES: Set rising and/or falling edge inputs + * that each can fire the trigger. + * @COMEDI_DIGITAL_TRIG_ENABLE_LEVELS: Set a combination of high and/or low + * level inputs that can fire the trigger. + * + * These are used with the %INSN_CONFIG_DIGITAL_TRIG configuration instruction. + * The data for the configuration instruction is as follows... + * + * data[%0] = %INSN_CONFIG_DIGITAL_TRIG + * + * data[%1] = trigger ID + * + * data[%2] = configuration operation + * + * data[%3] = configuration parameter 1 + * + * data[%4] = configuration parameter 2 + * + * data[%5] = configuration parameter 3 + * + * The trigger ID (data[%1]) is used to differentiate multiple digital triggers + * belonging to the same subdevice. The configuration operation (data[%2]) is + * one of the enum comedi_digital_trig_op values. The configuration + * parameters (data[%3], data[%4], and data[%5]) depend on the operation; they + * are not used with %COMEDI_DIGITAL_TRIG_DISABLE. + * + * For %COMEDI_DIGITAL_TRIG_ENABLE_EDGES and %COMEDI_DIGITAL_TRIG_ENABLE_LEVELS, + * configuration parameter 1 (data[%3]) contains a "left-shift" value that + * specifies the input corresponding to bit 0 of configuration parameters 2 + * and 3. This is useful if the trigger has more than 32 inputs. + * + * For %COMEDI_DIGITAL_TRIG_ENABLE_EDGES, configuration parameter 2 (data[%4]) + * specifies which of up to 32 inputs have rising-edge sensitivity, and + * configuration parameter 3 (data[%5]) specifies which of up to 32 inputs + * have falling-edge sensitivity that can fire the trigger. + * + * For %COMEDI_DIGITAL_TRIG_ENABLE_LEVELS, configuration parameter 2 (data[%4]) + * specifies which of up to 32 inputs must be at a high level, and + * configuration parameter 3 (data[%5]) specifies which of up to 32 inputs + * must be at a low level for the trigger to fire. + * + * Some sequences of %INSN_CONFIG_DIGITAL_TRIG instructions may have a (partly) + * accumulative effect, depending on the low-level driver. This is useful + * when setting up a trigger that has more than 32 inputs, or has a combination + * of edge- and level-triggered inputs. + */ +enum comedi_digital_trig_op { + COMEDI_DIGITAL_TRIG_DISABLE = 0, + COMEDI_DIGITAL_TRIG_ENABLE_EDGES = 1, + COMEDI_DIGITAL_TRIG_ENABLE_LEVELS = 2 +}; + +/** + * enum comedi_support_level - support level for a COMEDI feature + * @COMEDI_UNKNOWN_SUPPORT: Unspecified support for feature. + * @COMEDI_SUPPORTED: Feature is supported. + * @COMEDI_UNSUPPORTED: Feature is unsupported. + */ +enum comedi_support_level { + COMEDI_UNKNOWN_SUPPORT = 0, + COMEDI_SUPPORTED, + COMEDI_UNSUPPORTED +}; + +/** + * enum comedi_counter_status_flags - counter status bits + * @COMEDI_COUNTER_ARMED: Counter is armed. + * @COMEDI_COUNTER_COUNTING: Counter is counting. + * @COMEDI_COUNTER_TERMINAL_COUNT: Counter reached terminal count. + * + * These bitwise values are used by the %INSN_CONFIG_GET_COUNTER_STATUS + * configuration instruction to report the status of a counter. + */ +enum comedi_counter_status_flags { + COMEDI_COUNTER_ARMED = 0x1, + COMEDI_COUNTER_COUNTING = 0x2, + COMEDI_COUNTER_TERMINAL_COUNT = 0x4, +}; + +/* ioctls */ + +#define CIO 'd' +#define COMEDI_DEVCONFIG _IOW(CIO, 0, struct comedi_devconfig) +#define COMEDI_DEVINFO _IOR(CIO, 1, struct comedi_devinfo) +#define COMEDI_SUBDINFO _IOR(CIO, 2, struct comedi_subdinfo) +#define COMEDI_CHANINFO _IOR(CIO, 3, struct comedi_chaninfo) +/* _IOWR(CIO, 4, ...) is reserved */ +#define COMEDI_LOCK _IO(CIO, 5) +#define COMEDI_UNLOCK _IO(CIO, 6) +#define COMEDI_CANCEL _IO(CIO, 7) +#define COMEDI_RANGEINFO _IOR(CIO, 8, struct comedi_rangeinfo) +#define COMEDI_CMD _IOR(CIO, 9, struct comedi_cmd) +#define COMEDI_CMDTEST _IOR(CIO, 10, struct comedi_cmd) +#define COMEDI_INSNLIST _IOR(CIO, 11, struct comedi_insnlist) +#define COMEDI_INSN _IOR(CIO, 12, struct comedi_insn) +#define COMEDI_BUFCONFIG _IOR(CIO, 13, struct comedi_bufconfig) +#define COMEDI_BUFINFO _IOWR(CIO, 14, struct comedi_bufinfo) +#define COMEDI_POLL _IO(CIO, 15) +#define COMEDI_SETRSUBD _IO(CIO, 16) +#define COMEDI_SETWSUBD _IO(CIO, 17) + +/* structures */ + +/** + * struct comedi_insn - COMEDI instruction + * @insn: COMEDI instruction type (%INSN_xxx). + * @n: Length of @data[]. + * @data: Pointer to data array operated on by the instruction. + * @subdev: Subdevice index. + * @chanspec: A packed "chanspec" value consisting of channel number, + * analog range index, analog reference type, and flags. + * @unused: Reserved for future use. + * + * This is used with the %COMEDI_INSN ioctl, and indirectly with the + * %COMEDI_INSNLIST ioctl. + */ +struct comedi_insn { + unsigned int insn; + unsigned int n; + unsigned int __user *data; + unsigned int subdev; + unsigned int chanspec; + unsigned int unused[3]; +}; + +/** + * struct comedi_insnlist - list of COMEDI instructions + * @n_insns: Number of COMEDI instructions. + * @insns: Pointer to array COMEDI instructions. + * + * This is used with the %COMEDI_INSNLIST ioctl. + */ +struct comedi_insnlist { + unsigned int n_insns; + struct comedi_insn __user *insns; +}; + +/** + * struct comedi_cmd - COMEDI asynchronous acquisition command details + * @subdev: Subdevice index. + * @flags: Command flags (%CMDF_xxx). + * @start_src: "Start acquisition" trigger source (%TRIG_xxx). + * @start_arg: "Start acquisition" trigger argument. + * @scan_begin_src: "Scan begin" trigger source. + * @scan_begin_arg: "Scan begin" trigger argument. + * @convert_src: "Convert" trigger source. + * @convert_arg: "Convert" trigger argument. + * @scan_end_src: "Scan end" trigger source. + * @scan_end_arg: "Scan end" trigger argument. + * @stop_src: "Stop acquisition" trigger source. + * @stop_arg: "Stop acquisition" trigger argument. + * @chanlist: Pointer to array of "chanspec" values, containing a + * sequence of channel numbers packed with analog range + * index, etc. + * @chanlist_len: Number of channels in sequence. + * @data: Pointer to miscellaneous set-up data (not used). + * @data_len: Length of miscellaneous set-up data. + * + * This is used with the %COMEDI_CMD or %COMEDI_CMDTEST ioctl to set-up + * or validate an asynchronous acquisition command. The ioctl may modify + * the &struct comedi_cmd and copy it back to the caller. + * + * Optional command @flags values that can be ORed together... + * + * %CMDF_BOGUS - makes %COMEDI_CMD ioctl return error %EAGAIN instead of + * starting the command. + * + * %CMDF_PRIORITY - requests "hard real-time" processing (which is not + * supported in this version of COMEDI). + * + * %CMDF_WAKE_EOS - requests the command makes data available for reading + * after every "scan" period. + * + * %CMDF_WRITE - marks the command as being in the "write" (to device) + * direction. This does not need to be specified by the caller unless the + * subdevice supports commands in either direction. + * + * %CMDF_RAWDATA - prevents the command from "munging" the data between the + * COMEDI sample format and the raw hardware sample format. + * + * %CMDF_ROUND_NEAREST - requests timing periods to be rounded to nearest + * supported values. + * + * %CMDF_ROUND_DOWN - requests timing periods to be rounded down to supported + * values (frequencies rounded up). + * + * %CMDF_ROUND_UP - requests timing periods to be rounded up to supported + * values (frequencies rounded down). + * + * Trigger source values for @start_src, @scan_begin_src, @convert_src, + * @scan_end_src, and @stop_src... + * + * %TRIG_ANY - "all ones" value used to test which trigger sources are + * supported. + * + * %TRIG_INVALID - "all zeroes" value used to indicate that all requested + * trigger sources are invalid. + * + * %TRIG_NONE - never trigger (often used as a @stop_src value). + * + * %TRIG_NOW - trigger after '_arg' nanoseconds. + * + * %TRIG_FOLLOW - trigger follows another event. + * + * %TRIG_TIMER - trigger every '_arg' nanoseconds. + * + * %TRIG_COUNT - trigger when count '_arg' is reached. + * + * %TRIG_EXT - trigger on external signal specified by '_arg'. + * + * %TRIG_INT - trigger on internal, software trigger specified by '_arg'. + * + * %TRIG_OTHER - trigger on other, driver-defined signal specified by '_arg'. + */ +struct comedi_cmd { + unsigned int subdev; + unsigned int flags; + + unsigned int start_src; + unsigned int start_arg; + + unsigned int scan_begin_src; + unsigned int scan_begin_arg; + + unsigned int convert_src; + unsigned int convert_arg; + + unsigned int scan_end_src; + unsigned int scan_end_arg; + + unsigned int stop_src; + unsigned int stop_arg; + + unsigned int *chanlist; + unsigned int chanlist_len; + + short __user *data; + unsigned int data_len; +}; + +/** + * struct comedi_chaninfo - used to retrieve per-channel information + * @subdev: Subdevice index. + * @maxdata_list: Optional pointer to per-channel maximum data values. + * @flaglist: Optional pointer to per-channel flags. + * @rangelist: Optional pointer to per-channel range types. + * @unused: Reserved for future use. + * + * This is used with the %COMEDI_CHANINFO ioctl to get per-channel information + * for the subdevice. Use of this requires knowledge of the number of channels + * and subdevice flags obtained using the %COMEDI_SUBDINFO ioctl. + * + * The @maxdata_list member must be %NULL unless the %SDF_MAXDATA subdevice + * flag is set. The @flaglist member must be %NULL unless the %SDF_FLAGS + * subdevice flag is set. The @rangelist member must be %NULL unless the + * %SDF_RANGETYPE subdevice flag is set. Otherwise, the arrays they point to + * must be at least as long as the number of channels. + */ +struct comedi_chaninfo { + unsigned int subdev; + unsigned int __user *maxdata_list; + unsigned int __user *flaglist; + unsigned int __user *rangelist; + unsigned int unused[4]; +}; + +/** + * struct comedi_rangeinfo - used to retrieve the range table for a channel + * @range_type: Encodes subdevice index (bits 27:24), channel index + * (bits 23:16) and range table length (bits 15:0). + * @range_ptr: Pointer to array of @struct comedi_krange to be filled + * in with the range table for the channel or subdevice. + * + * This is used with the %COMEDI_RANGEINFO ioctl to retrieve the range table + * for a specific channel (if the subdevice has the %SDF_RANGETYPE flag set to + * indicate that the range table depends on the channel), or for the subdevice + * as a whole (if the %SDF_RANGETYPE flag is clear, indicating the range table + * is shared by all channels). + * + * The @range_type value is an input to the ioctl and comes from a previous + * use of the %COMEDI_SUBDINFO ioctl (if the %SDF_RANGETYPE flag is clear), + * or the %COMEDI_CHANINFO ioctl (if the %SDF_RANGETYPE flag is set). + */ +struct comedi_rangeinfo { + unsigned int range_type; + void __user *range_ptr; +}; + +/** + * struct comedi_krange - describes a range in a range table + * @min: Minimum value in millionths (1e-6) of a unit. + * @max: Maximum value in millionths (1e-6) of a unit. + * @flags: Indicates the units (in bits 7:0) OR'ed with optional flags. + * + * A range table is associated with a single channel, or with all channels in a + * subdevice, and a list of one or more ranges. A %struct comedi_krange + * describes the physical range of units for one of those ranges. Sample + * values in COMEDI are unsigned from %0 up to some 'maxdata' value. The + * mapping from sample values to physical units is assumed to be nomimally + * linear (for the purpose of describing the range), with sample value %0 + * mapping to @min, and the 'maxdata' sample value mapping to @max. + * + * The currently defined units are %UNIT_volt (%0), %UNIT_mA (%1), and + * %UNIT_none (%2). The @min and @max values are the physical range multiplied + * by 1e6, so a @max value of %1000000 (with %UNIT_volt) represents a maximal + * value of 1 volt. + * + * The only defined flag value is %RF_EXTERNAL (%0x100), indicating that the + * range needs to be multiplied by an external reference. + */ +struct comedi_krange { + int min; + int max; + unsigned int flags; +}; + +/** + * struct comedi_subdinfo - used to retrieve information about a subdevice + * @type: Type of subdevice from &enum comedi_subdevice_type. + * @n_chan: Number of channels the subdevice supports. + * @subd_flags: A mixture of static and dynamic flags describing + * aspects of the subdevice and its current state. + * @timer_type: Timer type. Always set to %5 ("nanosecond timer"). + * @len_chanlist: Maximum length of a channel list if the subdevice + * supports asynchronous acquisition commands. + * @maxdata: Maximum sample value for all channels if the + * %SDF_MAXDATA subdevice flag is clear. + * @flags: Channel flags for all channels if the %SDF_FLAGS + * subdevice flag is clear. + * @range_type: The range type for all channels if the %SDF_RANGETYPE + * subdevice flag is clear. Encodes the subdevice index + * (bits 27:24), a dummy channel index %0 (bits 23:16), + * and the range table length (bits 15:0). + * @settling_time_0: Not used. + * @insn_bits_support: Set to %COMEDI_SUPPORTED if the subdevice supports the + * %INSN_BITS instruction, or to %COMEDI_UNSUPPORTED if it + * does not. + * @unused: Reserved for future use. + * + * This is used with the %COMEDI_SUBDINFO ioctl which copies an array of + * &struct comedi_subdinfo back to user space, with one element per subdevice. + * Use of this requires knowledge of the number of subdevices obtained from + * the %COMEDI_DEVINFO ioctl. + * + * These are the @subd_flags values that may be ORed together... + * + * %SDF_BUSY - the subdevice is busy processing an asynchronous command or a + * synchronous instruction. + * + * %SDF_BUSY_OWNER - the subdevice is busy processing an asynchronous + * acquisition command started on the current file object (the file object + * issuing the %COMEDI_SUBDINFO ioctl). + * + * %SDF_LOCKED - the subdevice is locked by a %COMEDI_LOCK ioctl. + * + * %SDF_LOCK_OWNER - the subdevice is locked by a %COMEDI_LOCK ioctl from the + * current file object. + * + * %SDF_MAXDATA - maximum sample values are channel-specific. + * + * %SDF_FLAGS - channel flags are channel-specific. + * + * %SDF_RANGETYPE - range types are channel-specific. + * + * %SDF_PWM_COUNTER - PWM can switch off automatically. + * + * %SDF_PWM_HBRIDGE - or PWM is signed (H-bridge). + * + * %SDF_CMD - the subdevice supports asynchronous commands. + * + * %SDF_SOFT_CALIBRATED - the subdevice uses software calibration. + * + * %SDF_CMD_WRITE - the subdevice supports asynchronous commands in the output + * ("write") direction. + * + * %SDF_CMD_READ - the subdevice supports asynchronous commands in the input + * ("read") direction. + * + * %SDF_READABLE - the subdevice is readable (e.g. analog input). + * + * %SDF_WRITABLE (aliased as %SDF_WRITEABLE) - the subdevice is writable (e.g. + * analog output). + * + * %SDF_INTERNAL - the subdevice has no externally visible lines. + * + * %SDF_GROUND - the subdevice can use ground as an analog reference. + * + * %SDF_COMMON - the subdevice can use a common analog reference. + * + * %SDF_DIFF - the subdevice can use differential inputs (or outputs). + * + * %SDF_OTHER - the subdevice can use some other analog reference. + * + * %SDF_DITHER - the subdevice can do dithering. + * + * %SDF_DEGLITCH - the subdevice can do deglitching. + * + * %SDF_MMAP - this is never set. + * + * %SDF_RUNNING - an asynchronous command is still running. + * + * %SDF_LSAMPL - the subdevice uses "long" (32-bit) samples (for asynchronous + * command data). + * + * %SDF_PACKED - the subdevice packs several DIO samples into a single sample + * (for asynchronous command data). + * + * No "channel flags" (@flags) values are currently defined. + */ +struct comedi_subdinfo { + unsigned int type; + unsigned int n_chan; + unsigned int subd_flags; + unsigned int timer_type; + unsigned int len_chanlist; + unsigned int maxdata; + unsigned int flags; + unsigned int range_type; + unsigned int settling_time_0; + unsigned int insn_bits_support; + unsigned int unused[8]; +}; + +/** + * struct comedi_devinfo - used to retrieve information about a COMEDI device + * @version_code: COMEDI version code. + * @n_subdevs: Number of subdevices the device has. + * @driver_name: Null-terminated COMEDI driver name. + * @board_name: Null-terminated COMEDI board name. + * @read_subdevice: Index of the current "read" subdevice (%-1 if none). + * @write_subdevice: Index of the current "write" subdevice (%-1 if none). + * @unused: Reserved for future use. + * + * This is used with the %COMEDI_DEVINFO ioctl to get basic information about + * the device. + */ +struct comedi_devinfo { + unsigned int version_code; + unsigned int n_subdevs; + char driver_name[COMEDI_NAMELEN]; + char board_name[COMEDI_NAMELEN]; + int read_subdevice; + int write_subdevice; + int unused[30]; +}; + +/** + * struct comedi_devconfig - used to configure a legacy COMEDI device + * @board_name: Null-terminated string specifying the type of board + * to configure. + * @options: An array of integer configuration options. + * + * This is used with the %COMEDI_DEVCONFIG ioctl to configure a "legacy" COMEDI + * device, such as an ISA card. Not all COMEDI drivers support this. Those + * that do either expect the specified board name to match one of a list of + * names registered with the COMEDI core, or expect the specified board name + * to match the COMEDI driver name itself. The configuration options are + * handled in a driver-specific manner. + */ +struct comedi_devconfig { + char board_name[COMEDI_NAMELEN]; + int options[COMEDI_NDEVCONFOPTS]; +}; + +/** + * struct comedi_bufconfig - used to set or get buffer size for a subdevice + * @subdevice: Subdevice index. + * @flags: Not used. + * @maximum_size: Maximum allowed buffer size. + * @size: Buffer size. + * @unused: Reserved for future use. + * + * This is used with the %COMEDI_BUFCONFIG ioctl to get or configure the + * maximum buffer size and current buffer size for a COMEDI subdevice that + * supports asynchronous commands. If the subdevice does not support + * asynchronous commands, @maximum_size and @size are ignored and set to 0. + * + * On ioctl input, non-zero values of @maximum_size and @size specify a + * new maximum size and new current size (in bytes), respectively. These + * will by rounded up to a multiple of %PAGE_SIZE. Specifying a new maximum + * size requires admin capabilities. + * + * On ioctl output, @maximum_size and @size and set to the current maximum + * buffer size and current buffer size, respectively. + */ +struct comedi_bufconfig { + unsigned int subdevice; + unsigned int flags; + + unsigned int maximum_size; + unsigned int size; + + unsigned int unused[4]; +}; + +/** + * struct comedi_bufinfo - used to manipulate buffer position for a subdevice + * @subdevice: Subdevice index. + * @bytes_read: Specify amount to advance read position for an + * asynchronous command in the input ("read") direction. + * @buf_write_ptr: Current write position (index) within the buffer. + * @buf_read_ptr: Current read position (index) within the buffer. + * @buf_write_count: Total amount written, modulo 2^32. + * @buf_read_count: Total amount read, modulo 2^32. + * @bytes_written: Specify amount to advance write position for an + * asynchronous command in the output ("write") direction. + * @unused: Reserved for future use. + * + * This is used with the %COMEDI_BUFINFO ioctl to optionally advance the + * current read or write position in an asynchronous acquisition data buffer, + * and to get the current read and write positions in the buffer. + */ +struct comedi_bufinfo { + unsigned int subdevice; + unsigned int bytes_read; + + unsigned int buf_write_ptr; + unsigned int buf_read_ptr; + unsigned int buf_write_count; + unsigned int buf_read_count; + + unsigned int bytes_written; + + unsigned int unused[4]; +}; + +/* range stuff */ + +#define __RANGE(a, b) ((((a) & 0xffff) << 16) | ((b) & 0xffff)) + +#define RANGE_OFFSET(a) (((a) >> 16) & 0xffff) +#define RANGE_LENGTH(b) ((b) & 0xffff) + +#define RF_UNIT(flags) ((flags) & 0xff) +#define RF_EXTERNAL 0x100 + +#define UNIT_volt 0 +#define UNIT_mA 1 +#define UNIT_none 2 + +#define COMEDI_MIN_SPEED 0xffffffffu + +/**********************************************************/ +/* everything after this line is ALPHA */ +/**********************************************************/ + +/* + * 8254 specific configuration. + * + * It supports two config commands: + * + * 0 ID: INSN_CONFIG_SET_COUNTER_MODE + * 1 8254 Mode + * I8254_MODE0, I8254_MODE1, ..., I8254_MODE5 + * OR'ed with: + * I8254_BCD, I8254_BINARY + * + * 0 ID: INSN_CONFIG_8254_READ_STATUS + * 1 <-- Status byte returned here. + * B7 = Output + * B6 = NULL Count + * B5 - B0 Current mode. + */ + +enum i8254_mode { + I8254_MODE0 = (0 << 1), /* Interrupt on terminal count */ + I8254_MODE1 = (1 << 1), /* Hardware retriggerable one-shot */ + I8254_MODE2 = (2 << 1), /* Rate generator */ + I8254_MODE3 = (3 << 1), /* Square wave mode */ + I8254_MODE4 = (4 << 1), /* Software triggered strobe */ + /* Hardware triggered strobe (retriggerable) */ + I8254_MODE5 = (5 << 1), + /* Use binary-coded decimal instead of binary (pretty useless) */ + I8254_BCD = 1, + I8254_BINARY = 0 +}; + +/* *** BEGIN GLOBALLY-NAMED NI TERMINALS/SIGNALS *** */ + +/* + * Common National Instruments Terminal/Signal names. + * Some of these have no NI_ prefix as they are useful for non-NI hardware, such + * as those that utilize the PXI/RTSI trigger lines. + * + * NOTE ABOUT THE CHOICE OF NAMES HERE AND THE CAMELSCRIPT: + * The choice to use CamelScript and the exact names below is for + * maintainability, clarity, similarity to manufacturer's documentation, + * _and_ a mitigation for confusion that has plagued the use of these drivers + * for years! + * + * More detail: + * There have been significant confusions over the past many years for users + * when trying to understand how to connect to/from signals and terminals on + * NI hardware using comedi. The major reason for this is that the actual + * register values were exposed and required to be used by users. Several + * major reasons exist why this caused major confusion for users: + * 1) The register values are _NOT_ in user documentation, but rather in + * arcane locations, such as a few register programming manuals that are + * increasingly hard to find and the NI MHDDK (comments in example code). + * There is no one place to find the various valid values of the registers. + * 2) The register values are _NOT_ completely consistent. There is no way to + * gain any sense of intuition of which values, or even enums one should use + * for various registers. There was some attempt in prior use of comedi to + * name enums such that a user might know which enums should be used for + * varying purposes, but the end-user had to gain a knowledge of register + * values to correctly wield this approach. + * 3) The names for signals and registers found in the various register level + * programming manuals and vendor-provided documentation are _not_ even + * close to the same names that are in the end-user documentation. + * + * Similar, albeit less, confusion plagued NI's previous version of their own + * drivers. Earlier than 2003, NI greatly simplified the situation for users + * by releasing a new API that abstracted the names of signals/terminals to a + * common and intuitive set of names. + * + * The names below mirror the names chosen and well documented by NI. These + * names are exposed to the user via the comedilib user library. By keeping + * the names below, in spite of the use of CamelScript, maintenance will be + * greatly eased and confusion for users _and_ comedi developers will be + * greatly reduced. + */ + +/* + * Base of abstracted NI names. + * The first 16 bits of *_arg are reserved for channel selection. + * Since we only actually need the first 4 or 5 bits for all register values on + * NI select registers anyways, we'll identify all values >= (1<<15) as being an + * abstracted NI signal/terminal name. + * These values are also used/returned by INSN_DEVICE_CONFIG_TEST_ROUTE, + * INSN_DEVICE_CONFIG_CONNECT_ROUTE, INSN_DEVICE_CONFIG_DISCONNECT_ROUTE, + * and INSN_DEVICE_CONFIG_GET_ROUTES. + */ +#define NI_NAMES_BASE 0x8000u + +#define _TERM_N(base, n, x) ((base) + ((x) & ((n) - 1))) + +/* + * not necessarily all allowed 64 PFIs are valid--certainly not for all devices + */ +#define NI_PFI(x) _TERM_N(NI_NAMES_BASE, 64, x) +/* 8 trigger lines by standard, Some devices cannot talk to all eight. */ +#define TRIGGER_LINE(x) _TERM_N(NI_PFI(-1) + 1, 8, x) +/* 4 RTSI shared MUXes to route signals to/from TRIGGER_LINES on NI hardware */ +#define NI_RTSI_BRD(x) _TERM_N(TRIGGER_LINE(-1) + 1, 4, x) + +/* *** Counter/timer names : 8 counters max *** */ +#define NI_MAX_COUNTERS 8 +#define NI_COUNTER_NAMES_BASE (NI_RTSI_BRD(-1) + 1) +#define NI_CtrSource(x) _TERM_N(NI_COUNTER_NAMES_BASE, NI_MAX_COUNTERS, x) +/* Gate, Aux, A,B,Z are all treated, at times as gates */ +#define NI_GATES_NAMES_BASE (NI_CtrSource(-1) + 1) +#define NI_CtrGate(x) _TERM_N(NI_GATES_NAMES_BASE, NI_MAX_COUNTERS, x) +#define NI_CtrAux(x) _TERM_N(NI_CtrGate(-1) + 1, NI_MAX_COUNTERS, x) +#define NI_CtrA(x) _TERM_N(NI_CtrAux(-1) + 1, NI_MAX_COUNTERS, x) +#define NI_CtrB(x) _TERM_N(NI_CtrA(-1) + 1, NI_MAX_COUNTERS, x) +#define NI_CtrZ(x) _TERM_N(NI_CtrB(-1) + 1, NI_MAX_COUNTERS, x) +#define NI_GATES_NAMES_MAX NI_CtrZ(-1) +#define NI_CtrArmStartTrigger(x) _TERM_N(NI_CtrZ(-1) + 1, NI_MAX_COUNTERS, x) +#define NI_CtrInternalOutput(x) \ + _TERM_N(NI_CtrArmStartTrigger(-1) + 1, NI_MAX_COUNTERS, x) +/** external pin(s) labeled conveniently as CtrOut. */ +#define NI_CtrOut(x) _TERM_N(NI_CtrInternalOutput(-1) + 1, NI_MAX_COUNTERS, x) +/** For Buffered sampling of ctr -- x series capability. */ +#define NI_CtrSampleClock(x) _TERM_N(NI_CtrOut(-1) + 1, NI_MAX_COUNTERS, x) +#define NI_COUNTER_NAMES_MAX NI_CtrSampleClock(-1) + +enum ni_common_signal_names { + /* PXI_Star: this is a non-NI-specific signal */ + PXI_Star = NI_COUNTER_NAMES_MAX + 1, + PXI_Clk10, + PXIe_Clk100, + NI_AI_SampleClock, + NI_AI_SampleClockTimebase, + NI_AI_StartTrigger, + NI_AI_ReferenceTrigger, + NI_AI_ConvertClock, + NI_AI_ConvertClockTimebase, + NI_AI_PauseTrigger, + NI_AI_HoldCompleteEvent, + NI_AI_HoldComplete, + NI_AI_ExternalMUXClock, + NI_AI_STOP, /* pulse signal that occurs when a update is finished(?) */ + NI_AO_SampleClock, + NI_AO_SampleClockTimebase, + NI_AO_StartTrigger, + NI_AO_PauseTrigger, + NI_DI_SampleClock, + NI_DI_SampleClockTimebase, + NI_DI_StartTrigger, + NI_DI_ReferenceTrigger, + NI_DI_PauseTrigger, + NI_DI_InputBufferFull, + NI_DI_ReadyForStartEvent, + NI_DI_ReadyForTransferEventBurst, + NI_DI_ReadyForTransferEventPipelined, + NI_DO_SampleClock, + NI_DO_SampleClockTimebase, + NI_DO_StartTrigger, + NI_DO_PauseTrigger, + NI_DO_OutputBufferFull, + NI_DO_DataActiveEvent, + NI_DO_ReadyForStartEvent, + NI_DO_ReadyForTransferEvent, + NI_MasterTimebase, + NI_20MHzTimebase, + NI_80MHzTimebase, + NI_100MHzTimebase, + NI_200MHzTimebase, + NI_100kHzTimebase, + NI_10MHzRefClock, + NI_FrequencyOutput, + NI_ChangeDetectionEvent, + NI_AnalogComparisonEvent, + NI_WatchdogExpiredEvent, + NI_WatchdogExpirationTrigger, + NI_SCXI_Trig1, + NI_LogicLow, + NI_LogicHigh, + NI_ExternalStrobe, + NI_PFI_DO, + NI_CaseGround, + /* special internal signal used as variable source for RTSI bus: */ + NI_RGOUT0, + + /* just a name to make the next more convenient, regardless of above */ + _NI_NAMES_MAX_PLUS_1, + NI_NUM_NAMES = _NI_NAMES_MAX_PLUS_1 - NI_NAMES_BASE, +}; + +/* *** END GLOBALLY-NAMED NI TERMINALS/SIGNALS *** */ + +#define NI_USUAL_PFI_SELECT(x) (((x) < 10) ? (0x1 + (x)) : (0xb + (x))) +#define NI_USUAL_RTSI_SELECT(x) (((x) < 7) ? (0xb + (x)) : 0x1b) + +/* + * mode bits for NI general-purpose counters, set with + * INSN_CONFIG_SET_COUNTER_MODE + */ +#define NI_GPCT_COUNTING_MODE_SHIFT 16 +#define NI_GPCT_INDEX_PHASE_BITSHIFT 20 +#define NI_GPCT_COUNTING_DIRECTION_SHIFT 24 +enum ni_gpct_mode_bits { + NI_GPCT_GATE_ON_BOTH_EDGES_BIT = 0x4, + NI_GPCT_EDGE_GATE_MODE_MASK = 0x18, + NI_GPCT_EDGE_GATE_STARTS_STOPS_BITS = 0x0, + NI_GPCT_EDGE_GATE_STOPS_STARTS_BITS = 0x8, + NI_GPCT_EDGE_GATE_STARTS_BITS = 0x10, + NI_GPCT_EDGE_GATE_NO_STARTS_NO_STOPS_BITS = 0x18, + NI_GPCT_STOP_MODE_MASK = 0x60, + NI_GPCT_STOP_ON_GATE_BITS = 0x00, + NI_GPCT_STOP_ON_GATE_OR_TC_BITS = 0x20, + NI_GPCT_STOP_ON_GATE_OR_SECOND_TC_BITS = 0x40, + NI_GPCT_LOAD_B_SELECT_BIT = 0x80, + NI_GPCT_OUTPUT_MODE_MASK = 0x300, + NI_GPCT_OUTPUT_TC_PULSE_BITS = 0x100, + NI_GPCT_OUTPUT_TC_TOGGLE_BITS = 0x200, + NI_GPCT_OUTPUT_TC_OR_GATE_TOGGLE_BITS = 0x300, + NI_GPCT_HARDWARE_DISARM_MASK = 0xc00, + NI_GPCT_NO_HARDWARE_DISARM_BITS = 0x000, + NI_GPCT_DISARM_AT_TC_BITS = 0x400, + NI_GPCT_DISARM_AT_GATE_BITS = 0x800, + NI_GPCT_DISARM_AT_TC_OR_GATE_BITS = 0xc00, + NI_GPCT_LOADING_ON_TC_BIT = 0x1000, + NI_GPCT_LOADING_ON_GATE_BIT = 0x4000, + NI_GPCT_COUNTING_MODE_MASK = 0x7 << NI_GPCT_COUNTING_MODE_SHIFT, + NI_GPCT_COUNTING_MODE_NORMAL_BITS = + 0x0 << NI_GPCT_COUNTING_MODE_SHIFT, + NI_GPCT_COUNTING_MODE_QUADRATURE_X1_BITS = + 0x1 << NI_GPCT_COUNTING_MODE_SHIFT, + NI_GPCT_COUNTING_MODE_QUADRATURE_X2_BITS = + 0x2 << NI_GPCT_COUNTING_MODE_SHIFT, + NI_GPCT_COUNTING_MODE_QUADRATURE_X4_BITS = + 0x3 << NI_GPCT_COUNTING_MODE_SHIFT, + NI_GPCT_COUNTING_MODE_TWO_PULSE_BITS = + 0x4 << NI_GPCT_COUNTING_MODE_SHIFT, + NI_GPCT_COUNTING_MODE_SYNC_SOURCE_BITS = + 0x6 << NI_GPCT_COUNTING_MODE_SHIFT, + NI_GPCT_INDEX_PHASE_MASK = 0x3 << NI_GPCT_INDEX_PHASE_BITSHIFT, + NI_GPCT_INDEX_PHASE_LOW_A_LOW_B_BITS = + 0x0 << NI_GPCT_INDEX_PHASE_BITSHIFT, + NI_GPCT_INDEX_PHASE_LOW_A_HIGH_B_BITS = + 0x1 << NI_GPCT_INDEX_PHASE_BITSHIFT, + NI_GPCT_INDEX_PHASE_HIGH_A_LOW_B_BITS = + 0x2 << NI_GPCT_INDEX_PHASE_BITSHIFT, + NI_GPCT_INDEX_PHASE_HIGH_A_HIGH_B_BITS = + 0x3 << NI_GPCT_INDEX_PHASE_BITSHIFT, + NI_GPCT_INDEX_ENABLE_BIT = 0x400000, + NI_GPCT_COUNTING_DIRECTION_MASK = + 0x3 << NI_GPCT_COUNTING_DIRECTION_SHIFT, + NI_GPCT_COUNTING_DIRECTION_DOWN_BITS = + 0x00 << NI_GPCT_COUNTING_DIRECTION_SHIFT, + NI_GPCT_COUNTING_DIRECTION_UP_BITS = + 0x1 << NI_GPCT_COUNTING_DIRECTION_SHIFT, + NI_GPCT_COUNTING_DIRECTION_HW_UP_DOWN_BITS = + 0x2 << NI_GPCT_COUNTING_DIRECTION_SHIFT, + NI_GPCT_COUNTING_DIRECTION_HW_GATE_BITS = + 0x3 << NI_GPCT_COUNTING_DIRECTION_SHIFT, + NI_GPCT_RELOAD_SOURCE_MASK = 0xc000000, + NI_GPCT_RELOAD_SOURCE_FIXED_BITS = 0x0, + NI_GPCT_RELOAD_SOURCE_SWITCHING_BITS = 0x4000000, + NI_GPCT_RELOAD_SOURCE_GATE_SELECT_BITS = 0x8000000, + NI_GPCT_OR_GATE_BIT = 0x10000000, + NI_GPCT_INVERT_OUTPUT_BIT = 0x20000000 +}; + +/* + * Bits for setting a clock source with + * INSN_CONFIG_SET_CLOCK_SRC when using NI general-purpose counters. + */ +enum ni_gpct_clock_source_bits { + NI_GPCT_CLOCK_SRC_SELECT_MASK = 0x3f, + NI_GPCT_TIMEBASE_1_CLOCK_SRC_BITS = 0x0, + NI_GPCT_TIMEBASE_2_CLOCK_SRC_BITS = 0x1, + NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS = 0x2, + NI_GPCT_LOGIC_LOW_CLOCK_SRC_BITS = 0x3, + NI_GPCT_NEXT_GATE_CLOCK_SRC_BITS = 0x4, + NI_GPCT_NEXT_TC_CLOCK_SRC_BITS = 0x5, + /* NI 660x-specific */ + NI_GPCT_SOURCE_PIN_i_CLOCK_SRC_BITS = 0x6, + NI_GPCT_PXI10_CLOCK_SRC_BITS = 0x7, + NI_GPCT_PXI_STAR_TRIGGER_CLOCK_SRC_BITS = 0x8, + NI_GPCT_ANALOG_TRIGGER_OUT_CLOCK_SRC_BITS = 0x9, + NI_GPCT_PRESCALE_MODE_CLOCK_SRC_MASK = 0x30000000, + NI_GPCT_NO_PRESCALE_CLOCK_SRC_BITS = 0x0, + /* divide source by 2 */ + NI_GPCT_PRESCALE_X2_CLOCK_SRC_BITS = 0x10000000, + /* divide source by 8 */ + NI_GPCT_PRESCALE_X8_CLOCK_SRC_BITS = 0x20000000, + NI_GPCT_INVERT_CLOCK_SRC_BIT = 0x80000000 +}; + +/* NI 660x-specific */ +#define NI_GPCT_SOURCE_PIN_CLOCK_SRC_BITS(x) (0x10 + (x)) + +#define NI_GPCT_RTSI_CLOCK_SRC_BITS(x) (0x18 + (x)) + +/* no pfi on NI 660x */ +#define NI_GPCT_PFI_CLOCK_SRC_BITS(x) (0x20 + (x)) + +/* + * Possibilities for setting a gate source with + * INSN_CONFIG_SET_GATE_SRC when using NI general-purpose counters. + * May be bitwise-or'd with CR_EDGE or CR_INVERT. + */ +enum ni_gpct_gate_select { + /* m-series gates */ + NI_GPCT_TIMESTAMP_MUX_GATE_SELECT = 0x0, + NI_GPCT_AI_START2_GATE_SELECT = 0x12, + NI_GPCT_PXI_STAR_TRIGGER_GATE_SELECT = 0x13, + NI_GPCT_NEXT_OUT_GATE_SELECT = 0x14, + NI_GPCT_AI_START1_GATE_SELECT = 0x1c, + NI_GPCT_NEXT_SOURCE_GATE_SELECT = 0x1d, + NI_GPCT_ANALOG_TRIGGER_OUT_GATE_SELECT = 0x1e, + NI_GPCT_LOGIC_LOW_GATE_SELECT = 0x1f, + /* more gates for 660x */ + NI_GPCT_SOURCE_PIN_i_GATE_SELECT = 0x100, + NI_GPCT_GATE_PIN_i_GATE_SELECT = 0x101, + /* more gates for 660x "second gate" */ + NI_GPCT_UP_DOWN_PIN_i_GATE_SELECT = 0x201, + NI_GPCT_SELECTED_GATE_GATE_SELECT = 0x21e, + /* + * m-series "second gate" sources are unknown, + * we should add them here with an offset of 0x300 when + * known. + */ + NI_GPCT_DISABLED_GATE_SELECT = 0x8000, +}; + +#define NI_GPCT_GATE_PIN_GATE_SELECT(x) (0x102 + (x)) +#define NI_GPCT_RTSI_GATE_SELECT(x) NI_USUAL_RTSI_SELECT(x) +#define NI_GPCT_PFI_GATE_SELECT(x) NI_USUAL_PFI_SELECT(x) +#define NI_GPCT_UP_DOWN_PIN_GATE_SELECT(x) (0x202 + (x)) + +/* + * Possibilities for setting a source with + * INSN_CONFIG_SET_OTHER_SRC when using NI general-purpose counters. + */ +enum ni_gpct_other_index { + NI_GPCT_SOURCE_ENCODER_A, + NI_GPCT_SOURCE_ENCODER_B, + NI_GPCT_SOURCE_ENCODER_Z +}; + +enum ni_gpct_other_select { + /* m-series gates */ + /* Still unknown, probably only need NI_GPCT_PFI_OTHER_SELECT */ + NI_GPCT_DISABLED_OTHER_SELECT = 0x8000, +}; + +#define NI_GPCT_PFI_OTHER_SELECT(x) NI_USUAL_PFI_SELECT(x) + +/* + * start sources for ni general-purpose counters for use with + * INSN_CONFIG_ARM + */ +enum ni_gpct_arm_source { + NI_GPCT_ARM_IMMEDIATE = 0x0, + /* + * Start both the counter and the adjacent paired counter simultaneously + */ + NI_GPCT_ARM_PAIRED_IMMEDIATE = 0x1, + /* + * If the NI_GPCT_HW_ARM bit is set, we will pass the least significant + * bits (3 bits for 660x or 5 bits for m-series) through to the + * hardware. To select a hardware trigger, pass the appropriate select + * bit, e.g., + * NI_GPCT_HW_ARM | NI_GPCT_AI_START1_GATE_SELECT or + * NI_GPCT_HW_ARM | NI_GPCT_PFI_GATE_SELECT(pfi_number) + */ + NI_GPCT_HW_ARM = 0x1000, + NI_GPCT_ARM_UNKNOWN = NI_GPCT_HW_ARM, /* for backward compatibility */ +}; + +/* digital filtering options for ni 660x for use with INSN_CONFIG_FILTER. */ +enum ni_gpct_filter_select { + NI_GPCT_FILTER_OFF = 0x0, + NI_GPCT_FILTER_TIMEBASE_3_SYNC = 0x1, + NI_GPCT_FILTER_100x_TIMEBASE_1 = 0x2, + NI_GPCT_FILTER_20x_TIMEBASE_1 = 0x3, + NI_GPCT_FILTER_10x_TIMEBASE_1 = 0x4, + NI_GPCT_FILTER_2x_TIMEBASE_1 = 0x5, + NI_GPCT_FILTER_2x_TIMEBASE_3 = 0x6 +}; + +/* + * PFI digital filtering options for ni m-series for use with + * INSN_CONFIG_FILTER. + */ +enum ni_pfi_filter_select { + NI_PFI_FILTER_OFF = 0x0, + NI_PFI_FILTER_125ns = 0x1, + NI_PFI_FILTER_6425ns = 0x2, + NI_PFI_FILTER_2550us = 0x3 +}; + +/* master clock sources for ni mio boards and INSN_CONFIG_SET_CLOCK_SRC */ +enum ni_mio_clock_source { + NI_MIO_INTERNAL_CLOCK = 0, + /* + * Doesn't work for m-series, use NI_MIO_PLL_RTSI_CLOCK() + * the NI_MIO_PLL_* sources are m-series only + */ + NI_MIO_RTSI_CLOCK = 1, + NI_MIO_PLL_PXI_STAR_TRIGGER_CLOCK = 2, + NI_MIO_PLL_PXI10_CLOCK = 3, + NI_MIO_PLL_RTSI0_CLOCK = 4 +}; + +#define NI_MIO_PLL_RTSI_CLOCK(x) (NI_MIO_PLL_RTSI0_CLOCK + (x)) + +/* + * Signals which can be routed to an NI RTSI pin with INSN_CONFIG_SET_ROUTING. + * The numbers assigned are not arbitrary, they correspond to the bits required + * to program the board. + */ +enum ni_rtsi_routing { + NI_RTSI_OUTPUT_ADR_START1 = 0, + NI_RTSI_OUTPUT_ADR_START2 = 1, + NI_RTSI_OUTPUT_SCLKG = 2, + NI_RTSI_OUTPUT_DACUPDN = 3, + NI_RTSI_OUTPUT_DA_START1 = 4, + NI_RTSI_OUTPUT_G_SRC0 = 5, + NI_RTSI_OUTPUT_G_GATE0 = 6, + NI_RTSI_OUTPUT_RGOUT0 = 7, + NI_RTSI_OUTPUT_RTSI_BRD_0 = 8, + /* Pre-m-series always have RTSI clock on line 7 */ + NI_RTSI_OUTPUT_RTSI_OSC = 12 +}; + +#define NI_RTSI_OUTPUT_RTSI_BRD(x) (NI_RTSI_OUTPUT_RTSI_BRD_0 + (x)) + +/* + * Signals which can be routed to an NI PFI pin on an m-series board with + * INSN_CONFIG_SET_ROUTING. These numbers are also returned by + * INSN_CONFIG_GET_ROUTING on pre-m-series boards, even though their routing + * cannot be changed. The numbers assigned are not arbitrary, they correspond + * to the bits required to program the board. + */ +enum ni_pfi_routing { + NI_PFI_OUTPUT_PFI_DEFAULT = 0, + NI_PFI_OUTPUT_AI_START1 = 1, + NI_PFI_OUTPUT_AI_START2 = 2, + NI_PFI_OUTPUT_AI_CONVERT = 3, + NI_PFI_OUTPUT_G_SRC1 = 4, + NI_PFI_OUTPUT_G_GATE1 = 5, + NI_PFI_OUTPUT_AO_UPDATE_N = 6, + NI_PFI_OUTPUT_AO_START1 = 7, + NI_PFI_OUTPUT_AI_START_PULSE = 8, + NI_PFI_OUTPUT_G_SRC0 = 9, + NI_PFI_OUTPUT_G_GATE0 = 10, + NI_PFI_OUTPUT_EXT_STROBE = 11, + NI_PFI_OUTPUT_AI_EXT_MUX_CLK = 12, + NI_PFI_OUTPUT_GOUT0 = 13, + NI_PFI_OUTPUT_GOUT1 = 14, + NI_PFI_OUTPUT_FREQ_OUT = 15, + NI_PFI_OUTPUT_PFI_DO = 16, + NI_PFI_OUTPUT_I_ATRIG = 17, + NI_PFI_OUTPUT_RTSI0 = 18, + NI_PFI_OUTPUT_PXI_STAR_TRIGGER_IN = 26, + NI_PFI_OUTPUT_SCXI_TRIG1 = 27, + NI_PFI_OUTPUT_DIO_CHANGE_DETECT_RTSI = 28, + NI_PFI_OUTPUT_CDI_SAMPLE = 29, + NI_PFI_OUTPUT_CDO_UPDATE = 30 +}; + +#define NI_PFI_OUTPUT_RTSI(x) (NI_PFI_OUTPUT_RTSI0 + (x)) + +/* + * Signals which can be routed to output on a NI PFI pin on a 660x board + * with INSN_CONFIG_SET_ROUTING. The numbers assigned are + * not arbitrary, they correspond to the bits required + * to program the board. Lines 0 to 7 can only be set to + * NI_660X_PFI_OUTPUT_DIO. Lines 32 to 39 can only be set to + * NI_660X_PFI_OUTPUT_COUNTER. + */ +enum ni_660x_pfi_routing { + NI_660X_PFI_OUTPUT_COUNTER = 1, /* counter */ + NI_660X_PFI_OUTPUT_DIO = 2, /* static digital output */ +}; + +/* + * NI External Trigger lines. These values are not arbitrary, but are related + * to the bits required to program the board (offset by 1 for historical + * reasons). + */ +#define NI_EXT_PFI(x) (NI_USUAL_PFI_SELECT(x) - 1) +#define NI_EXT_RTSI(x) (NI_USUAL_RTSI_SELECT(x) - 1) + +/* + * Clock sources for CDIO subdevice on NI m-series boards. Used as the + * scan_begin_arg for a comedi_command. These sources may also be bitwise-or'd + * with CR_INVERT to change polarity. + */ +enum ni_m_series_cdio_scan_begin_src { + NI_CDIO_SCAN_BEGIN_SRC_GROUND = 0, + NI_CDIO_SCAN_BEGIN_SRC_AI_START = 18, + NI_CDIO_SCAN_BEGIN_SRC_AI_CONVERT = 19, + NI_CDIO_SCAN_BEGIN_SRC_PXI_STAR_TRIGGER = 20, + NI_CDIO_SCAN_BEGIN_SRC_G0_OUT = 28, + NI_CDIO_SCAN_BEGIN_SRC_G1_OUT = 29, + NI_CDIO_SCAN_BEGIN_SRC_ANALOG_TRIGGER = 30, + NI_CDIO_SCAN_BEGIN_SRC_AO_UPDATE = 31, + NI_CDIO_SCAN_BEGIN_SRC_FREQ_OUT = 32, + NI_CDIO_SCAN_BEGIN_SRC_DIO_CHANGE_DETECT_IRQ = 33 +}; + +#define NI_CDIO_SCAN_BEGIN_SRC_PFI(x) NI_USUAL_PFI_SELECT(x) +#define NI_CDIO_SCAN_BEGIN_SRC_RTSI(x) NI_USUAL_RTSI_SELECT(x) + +/* + * scan_begin_src for scan_begin_arg==TRIG_EXT with analog output command on NI + * boards. These scan begin sources can also be bitwise-or'd with CR_INVERT to + * change polarity. + */ +#define NI_AO_SCAN_BEGIN_SRC_PFI(x) NI_USUAL_PFI_SELECT(x) +#define NI_AO_SCAN_BEGIN_SRC_RTSI(x) NI_USUAL_RTSI_SELECT(x) + +/* + * Bits for setting a clock source with + * INSN_CONFIG_SET_CLOCK_SRC when using NI frequency output subdevice. + */ +enum ni_freq_out_clock_source_bits { + NI_FREQ_OUT_TIMEBASE_1_DIV_2_CLOCK_SRC, /* 10 MHz */ + NI_FREQ_OUT_TIMEBASE_2_CLOCK_SRC /* 100 KHz */ +}; + +/* + * Values for setting a clock source with INSN_CONFIG_SET_CLOCK_SRC for + * 8254 counter subdevices on Amplicon DIO boards (amplc_dio200 driver). + */ +enum amplc_dio_clock_source { + /* + * Per channel external clock + * input/output pin (pin is only an + * input when clock source set to this value, + * otherwise it is an output) + */ + AMPLC_DIO_CLK_CLKN, + AMPLC_DIO_CLK_10MHZ, /* 10 MHz internal clock */ + AMPLC_DIO_CLK_1MHZ, /* 1 MHz internal clock */ + AMPLC_DIO_CLK_100KHZ, /* 100 kHz internal clock */ + AMPLC_DIO_CLK_10KHZ, /* 10 kHz internal clock */ + AMPLC_DIO_CLK_1KHZ, /* 1 kHz internal clock */ + /* + * Output of preceding counter channel + * (for channel 0, preceding counter + * channel is channel 2 on preceding + * counter subdevice, for first counter + * subdevice, preceding counter + * subdevice is the last counter + * subdevice) + */ + AMPLC_DIO_CLK_OUTNM1, + AMPLC_DIO_CLK_EXT, /* per chip external input pin */ + /* the following are "enhanced" clock sources for PCIe models */ + AMPLC_DIO_CLK_VCC, /* clock input HIGH */ + AMPLC_DIO_CLK_GND, /* clock input LOW */ + AMPLC_DIO_CLK_PAT_PRESENT, /* "pattern present" signal */ + AMPLC_DIO_CLK_20MHZ /* 20 MHz internal clock */ +}; + +/* + * Values for setting a clock source with INSN_CONFIG_SET_CLOCK_SRC for + * timer subdevice on some Amplicon DIO PCIe boards (amplc_dio200 driver). + */ +enum amplc_dio_ts_clock_src { + AMPLC_DIO_TS_CLK_1GHZ, /* 1 ns period with 20 ns granularity */ + AMPLC_DIO_TS_CLK_1MHZ, /* 1 us period */ + AMPLC_DIO_TS_CLK_1KHZ /* 1 ms period */ +}; + +/* + * Values for setting a gate source with INSN_CONFIG_SET_GATE_SRC for + * 8254 counter subdevices on Amplicon DIO boards (amplc_dio200 driver). + */ +enum amplc_dio_gate_source { + AMPLC_DIO_GAT_VCC, /* internal high logic level */ + AMPLC_DIO_GAT_GND, /* internal low logic level */ + AMPLC_DIO_GAT_GATN, /* per channel external gate input */ + /* + * negated output of counter channel minus 2 + * (for channels 0 or 1, channel minus 2 is channel 1 or 2 on + * the preceding counter subdevice, for the first counter subdevice + * the preceding counter subdevice is the last counter subdevice) + */ + AMPLC_DIO_GAT_NOUTNM2, + AMPLC_DIO_GAT_RESERVED4, + AMPLC_DIO_GAT_RESERVED5, + AMPLC_DIO_GAT_RESERVED6, + AMPLC_DIO_GAT_RESERVED7, + /* the following are "enhanced" gate sources for PCIe models */ + AMPLC_DIO_GAT_NGATN = 6, /* negated per channel gate input */ + /* non-negated output of counter channel minus 2 */ + AMPLC_DIO_GAT_OUTNM2, + AMPLC_DIO_GAT_PAT_PRESENT, /* "pattern present" signal */ + AMPLC_DIO_GAT_PAT_OCCURRED, /* "pattern occurred" latched */ + AMPLC_DIO_GAT_PAT_GONE, /* "pattern gone away" latched */ + AMPLC_DIO_GAT_NPAT_PRESENT, /* negated "pattern present" */ + AMPLC_DIO_GAT_NPAT_OCCURRED, /* negated "pattern occurred" */ + AMPLC_DIO_GAT_NPAT_GONE /* negated "pattern gone away" */ +}; + +/* + * Values for setting a clock source with INSN_CONFIG_SET_CLOCK_SRC for + * the counter subdevice on the Kolter Electronic PCI-Counter board + * (ke_counter driver). + */ +enum ke_counter_clock_source { + KE_CLK_20MHZ, /* internal 20MHz (default) */ + KE_CLK_4MHZ, /* internal 4MHz (option) */ + KE_CLK_EXT /* external clock on pin 21 of D-Sub */ +}; + +#endif /* _COMEDI_H */ -- cgit v1.2.3 From 631e272b12075b60f7c7fc4f84f937d78a699844 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 17 Nov 2021 12:06:01 +0000 Subject: comedi: Move and rename "8255.h" to Some of the header files in "drivers/comedi/drivers/" are common enough to be useful to out-of-tree comedi driver modules. Using them for out-of-tree module builds is hampered by the headers being outside the "include/" directory so it is desirable to move them. There are about a couple of dozen Comedi device drivers that use the "comedi_8255" module to add digital I/O subdevices based on the venerable 8255 Programmable Peripheral Interface chip. The macros and declarations to use that module are in the "8255.h" header file in the comedi "drivers" directory. Move it into "include/linux/comedi/" and rename it to "comedi_8255.h" for naming consistency reasons. Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20211117120604.117740-4-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- include/linux/comedi/comedi_8255.h | 42 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 include/linux/comedi/comedi_8255.h (limited to 'include') diff --git a/include/linux/comedi/comedi_8255.h b/include/linux/comedi/comedi_8255.h new file mode 100644 index 000000000000..b2a5bc6b3a49 --- /dev/null +++ b/include/linux/comedi/comedi_8255.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * comedi_8255.h + * Generic 8255 digital I/O subdevice support + * + * COMEDI - Linux Control and Measurement Device Interface + * Copyright (C) 1998 David A. Schleef + */ + +#ifndef _COMEDI_8255_H +#define _COMEDI_8255_H + +#define I8255_SIZE 0x04 + +#define I8255_DATA_A_REG 0x00 +#define I8255_DATA_B_REG 0x01 +#define I8255_DATA_C_REG 0x02 +#define I8255_CTRL_REG 0x03 +#define I8255_CTRL_C_LO_IO BIT(0) +#define I8255_CTRL_B_IO BIT(1) +#define I8255_CTRL_B_MODE BIT(2) +#define I8255_CTRL_C_HI_IO BIT(3) +#define I8255_CTRL_A_IO BIT(4) +#define I8255_CTRL_A_MODE(x) ((x) << 5) +#define I8255_CTRL_CW BIT(7) + +struct comedi_device; +struct comedi_subdevice; + +int subdev_8255_init(struct comedi_device *dev, struct comedi_subdevice *s, + int (*io)(struct comedi_device *dev, int dir, int port, + int data, unsigned long regbase), + unsigned long regbase); + +int subdev_8255_mm_init(struct comedi_device *dev, struct comedi_subdevice *s, + int (*io)(struct comedi_device *dev, int dir, int port, + int data, unsigned long regbase), + unsigned long regbase); + +unsigned long subdev_8255_regbase(struct comedi_subdevice *s); + +#endif -- cgit v1.2.3 From 44fb7affcfa4e968e9c2ede023ef0e15f06d8209 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 17 Nov 2021 12:06:02 +0000 Subject: comedi: Move "comedi_8254.h" to Some of the header files in "drivers/comedi/drivers/" are common enough to be useful to out-of-tree comedi driver modules. Using them for out-of-tree module builds is hampered by the headers being outside the "include/" directory so it is desirable to move them. There are about a couple of dozen or so Comedi device drivers that use the "comedi_8254" module to add timers based on the venerable 8254 Programmable Interval Timer chip. The macros and declarations to use that module are in the "comedi_8254.h" header file in the comedi "drivers" directory. Move it into "include/linux/comedi/". Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20211117120604.117740-5-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- include/linux/comedi/comedi_8254.h | 134 +++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 include/linux/comedi/comedi_8254.h (limited to 'include') diff --git a/include/linux/comedi/comedi_8254.h b/include/linux/comedi/comedi_8254.h new file mode 100644 index 000000000000..d8264417e53c --- /dev/null +++ b/include/linux/comedi/comedi_8254.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * comedi_8254.h + * Generic 8254 timer/counter support + * Copyright (C) 2014 H Hartley Sweeten + * + * COMEDI - Linux Control and Measurement Device Interface + * Copyright (C) 2000 David A. Schleef + */ + +#ifndef _COMEDI_8254_H +#define _COMEDI_8254_H + +#include + +struct comedi_device; +struct comedi_insn; +struct comedi_subdevice; + +/* + * Common oscillator base values in nanoseconds + */ +#define I8254_OSC_BASE_10MHZ 100 +#define I8254_OSC_BASE_5MHZ 200 +#define I8254_OSC_BASE_4MHZ 250 +#define I8254_OSC_BASE_2MHZ 500 +#define I8254_OSC_BASE_1MHZ 1000 +#define I8254_OSC_BASE_100KHZ 10000 +#define I8254_OSC_BASE_10KHZ 100000 +#define I8254_OSC_BASE_1KHZ 1000000 + +/* + * I/O access size used to read/write registers + */ +#define I8254_IO8 1 +#define I8254_IO16 2 +#define I8254_IO32 4 + +/* + * Register map for generic 8254 timer (I8254_IO8 with 0 regshift) + */ +#define I8254_COUNTER0_REG 0x00 +#define I8254_COUNTER1_REG 0x01 +#define I8254_COUNTER2_REG 0x02 +#define I8254_CTRL_REG 0x03 +#define I8254_CTRL_SEL_CTR(x) ((x) << 6) +#define I8254_CTRL_READBACK(x) (I8254_CTRL_SEL_CTR(3) | BIT(x)) +#define I8254_CTRL_READBACK_COUNT I8254_CTRL_READBACK(4) +#define I8254_CTRL_READBACK_STATUS I8254_CTRL_READBACK(5) +#define I8254_CTRL_READBACK_SEL_CTR(x) (2 << (x)) +#define I8254_CTRL_RW(x) (((x) & 0x3) << 4) +#define I8254_CTRL_LATCH I8254_CTRL_RW(0) +#define I8254_CTRL_LSB_ONLY I8254_CTRL_RW(1) +#define I8254_CTRL_MSB_ONLY I8254_CTRL_RW(2) +#define I8254_CTRL_LSB_MSB I8254_CTRL_RW(3) + +/* counter maps zero to 0x10000 */ +#define I8254_MAX_COUNT 0x10000 + +/** + * struct comedi_8254 - private data used by this module + * @iobase: PIO base address of the registers (in/out) + * @mmio: MMIO base address of the registers (read/write) + * @iosize: I/O size used to access the registers (b/w/l) + * @regshift: register gap shift + * @osc_base: cascaded oscillator speed in ns + * @divisor: divisor for single counter + * @divisor1: divisor loaded into first cascaded counter + * @divisor2: divisor loaded into second cascaded counter + * #next_div: next divisor for single counter + * @next_div1: next divisor to use for first cascaded counter + * @next_div2: next divisor to use for second cascaded counter + * @clock_src; current clock source for each counter (driver specific) + * @gate_src; current gate source for each counter (driver specific) + * @busy: flags used to indicate that a counter is "busy" + * @insn_config: driver specific (*insn_config) callback + */ +struct comedi_8254 { + unsigned long iobase; + void __iomem *mmio; + unsigned int iosize; + unsigned int regshift; + unsigned int osc_base; + unsigned int divisor; + unsigned int divisor1; + unsigned int divisor2; + unsigned int next_div; + unsigned int next_div1; + unsigned int next_div2; + unsigned int clock_src[3]; + unsigned int gate_src[3]; + bool busy[3]; + + int (*insn_config)(struct comedi_device *dev, + struct comedi_subdevice *s, + struct comedi_insn *insn, unsigned int *data); +}; + +unsigned int comedi_8254_status(struct comedi_8254 *i8254, + unsigned int counter); +unsigned int comedi_8254_read(struct comedi_8254 *i8254, unsigned int counter); +void comedi_8254_write(struct comedi_8254 *i8254, + unsigned int counter, unsigned int val); + +int comedi_8254_set_mode(struct comedi_8254 *i8254, + unsigned int counter, unsigned int mode); +int comedi_8254_load(struct comedi_8254 *i8254, + unsigned int counter, unsigned int val, unsigned int mode); + +void comedi_8254_pacer_enable(struct comedi_8254 *i8254, + unsigned int counter1, unsigned int counter2, + bool enable); +void comedi_8254_update_divisors(struct comedi_8254 *i8254); +void comedi_8254_cascade_ns_to_timer(struct comedi_8254 *i8254, + unsigned int *nanosec, unsigned int flags); +void comedi_8254_ns_to_timer(struct comedi_8254 *i8254, + unsigned int *nanosec, unsigned int flags); + +void comedi_8254_set_busy(struct comedi_8254 *i8254, + unsigned int counter, bool busy); + +void comedi_8254_subdevice_init(struct comedi_subdevice *s, + struct comedi_8254 *i8254); + +struct comedi_8254 *comedi_8254_init(unsigned long iobase, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift); +struct comedi_8254 *comedi_8254_mm_init(void __iomem *mmio, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift); + +#endif /* _COMEDI_8254_H */ -- cgit v1.2.3 From fe7a4f5b9548456246ffda143bab59922acda9fd Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 17 Nov 2021 12:06:03 +0000 Subject: comedi: Move "comedi_isadma.h" to Some of the header files in "drivers/comedi/drivers/" are common enough to be useful to out-of-tree comedi driver modules. Using them for out-of-tree module builds is hampered by the headers being outside the "include/" directory so it is desirable to move them. There are about a half a dozen or so Comedi device drivers that use the "comedi_isadma" module to add ISA DMA support. The macros and declarations to use that module are in the "comedi_isadma.h" header file in the comedi "drivers" directory. Move it into "include/linux/comedi/". Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20211117120604.117740-6-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- include/linux/comedi/comedi_isadma.h | 114 +++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 include/linux/comedi/comedi_isadma.h (limited to 'include') diff --git a/include/linux/comedi/comedi_isadma.h b/include/linux/comedi/comedi_isadma.h new file mode 100644 index 000000000000..9d2b12db7e6e --- /dev/null +++ b/include/linux/comedi/comedi_isadma.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * COMEDI ISA DMA support functions + * Copyright (c) 2014 H Hartley Sweeten + */ + +#ifndef _COMEDI_ISADMA_H +#define _COMEDI_ISADMA_H + +#include + +struct comedi_device; +struct device; + +/* + * These are used to avoid issues when and the DMA_MODE_ + * defines are not available. + */ +#define COMEDI_ISADMA_READ 0 +#define COMEDI_ISADMA_WRITE 1 + +/** + * struct comedi_isadma_desc - cookie for ISA DMA + * @virt_addr: virtual address of buffer + * @hw_addr: hardware (bus) address of buffer + * @chan: DMA channel + * @maxsize: allocated size of buffer (in bytes) + * @size: transfer size (in bytes) + * @mode: DMA_MODE_READ or DMA_MODE_WRITE + */ +struct comedi_isadma_desc { + void *virt_addr; + dma_addr_t hw_addr; + unsigned int chan; + unsigned int maxsize; + unsigned int size; + char mode; +}; + +/** + * struct comedi_isadma - ISA DMA data + * @dev: device to allocate non-coherent memory for + * @desc: cookie for each DMA buffer + * @n_desc: the number of cookies + * @cur_dma: the current cookie in use + * @chan: the first DMA channel requested + * @chan2: the second DMA channel requested + */ +struct comedi_isadma { + struct device *dev; + struct comedi_isadma_desc *desc; + int n_desc; + int cur_dma; + unsigned int chan; + unsigned int chan2; +}; + +#if IS_ENABLED(CONFIG_ISA_DMA_API) + +void comedi_isadma_program(struct comedi_isadma_desc *desc); +unsigned int comedi_isadma_disable(unsigned int dma_chan); +unsigned int comedi_isadma_disable_on_sample(unsigned int dma_chan, + unsigned int size); +unsigned int comedi_isadma_poll(struct comedi_isadma *dma); +void comedi_isadma_set_mode(struct comedi_isadma_desc *desc, char dma_dir); + +struct comedi_isadma *comedi_isadma_alloc(struct comedi_device *dev, + int n_desc, unsigned int dma_chan1, + unsigned int dma_chan2, + unsigned int maxsize, char dma_dir); +void comedi_isadma_free(struct comedi_isadma *dma); + +#else /* !IS_ENABLED(CONFIG_ISA_DMA_API) */ + +static inline void comedi_isadma_program(struct comedi_isadma_desc *desc) +{ +} + +static inline unsigned int comedi_isadma_disable(unsigned int dma_chan) +{ + return 0; +} + +static inline unsigned int +comedi_isadma_disable_on_sample(unsigned int dma_chan, unsigned int size) +{ + return 0; +} + +static inline unsigned int comedi_isadma_poll(struct comedi_isadma *dma) +{ + return 0; +} + +static inline void comedi_isadma_set_mode(struct comedi_isadma_desc *desc, + char dma_dir) +{ +} + +static inline struct comedi_isadma * +comedi_isadma_alloc(struct comedi_device *dev, int n_desc, + unsigned int dma_chan1, unsigned int dma_chan2, + unsigned int maxsize, char dma_dir) +{ + return NULL; +} + +static inline void comedi_isadma_free(struct comedi_isadma *dma) +{ +} + +#endif /* !IS_ENABLED(CONFIG_ISA_DMA_API) */ + +#endif /* #ifndef _COMEDI_ISADMA_H */ -- cgit v1.2.3 From 2cca3465147d650be3de04927a99784b30251ade Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Fri, 12 Nov 2021 08:28:09 +0200 Subject: mei: bus: add client dma interface Expose the client dma mapping via mei client bus interface. The client dma has to be mapped before the device is enabled, therefore we need to create device linking already during mapping and we need to unmap after the client is disable hence we need to postpone the unlink and flush till unmapping or when destroying the device. Signed-off-by: Alexander Usyskin Co-developed-by: Tomas Winkler Signed-off-by: Tomas Winkler Signed-off-by: Emmanuel Grumbach Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20210420172755.12178-1-emmanuel.grumbach@intel.com Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211112062814.7502-1-emmanuel.grumbach@intel.com --- include/linux/mei_cl_bus.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index c6786c12b207..df1fab44ea5c 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -117,4 +117,7 @@ int mei_cldev_enable(struct mei_cl_device *cldev); int mei_cldev_disable(struct mei_cl_device *cldev); bool mei_cldev_enabled(const struct mei_cl_device *cldev); +void *mei_cldev_dma_map(struct mei_cl_device *cldev, u8 buffer_id, size_t size); +int mei_cldev_dma_unmap(struct mei_cl_device *cldev); + #endif /* _LINUX_MEI_CL_BUS_H */ -- cgit v1.2.3 From 95706be13b9f755d93b5b82bdc782af439f1ec22 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 26 Nov 2021 19:28:42 +0200 Subject: net: mscc: ocelot: create a function that replaces an existing VCAP filter VCAP (Versatile Content Aware Processor) is the TCAM-based engine behind tc flower offload on ocelot, among other things. The ingress port mask on which VCAP rules match is present as a bit field in the actual key of the rule. This means that it is possible for a rule to be shared among multiple source ports. When the rule is added one by one on each desired port, that the ingress port mask of the key must be edited and rewritten to hardware. But the API in ocelot_vcap.c does not allow for this. For one thing, ocelot_vcap_filter_add() and ocelot_vcap_filter_del() are not symmetric, because ocelot_vcap_filter_add() works with a preallocated and prepopulated filter and programs it to hardware, and ocelot_vcap_filter_del() does both the job of removing the specified filter from hardware, as well as kfreeing it. That is to say, the only option of editing a filter in place, which is to delete it, modify the structure and add it back, does not work because it results in use-after-free. This patch introduces ocelot_vcap_filter_replace, which trivially reprograms a VCAP entry to hardware, at the exact same index at which it existed before, without modifying any list or allocating any memory. Signed-off-by: Vladimir Oltean Acked-by: Richard Cochran Signed-off-by: Jakub Kicinski --- include/soc/mscc/ocelot_vcap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index eeb1142aa1b1..4d1dfa1136b2 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -703,6 +703,8 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot, struct netlink_ext_ack *extack); int ocelot_vcap_filter_del(struct ocelot *ocelot, struct ocelot_vcap_filter *rule); +int ocelot_vcap_filter_replace(struct ocelot *ocelot, + struct ocelot_vcap_filter *filter); struct ocelot_vcap_filter * ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, unsigned long cookie, bool tc_offload); -- cgit v1.2.3 From ec15baec3272bbec576f2ce7ce47765a8e9b7b1c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 26 Nov 2021 19:28:43 +0200 Subject: net: ptp: add a definition for the UDP port for IEEE 1588 general messages As opposed to event messages (Sync, PdelayReq etc) which require timestamping, general messages (Announce, FollowUp etc) do not. In PTP they are part of different streams of data. IEEE 1588-2008 Annex D.2 "UDP port numbers" states that the UDP destination port assigned by IANA is 319 for event messages, and 320 for general messages. Yet the kernel seems to be missing the definition for general messages. This patch adds it. Signed-off-by: Vladimir Oltean Acked-by: Richard Cochran Signed-off-by: Jakub Kicinski --- include/linux/ptp_classify.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index ae04968a3a47..9afd34a2d36c 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -37,6 +37,7 @@ #define PTP_MSGTYPE_PDELAY_RESP 0x3 #define PTP_EV_PORT 319 +#define PTP_GEN_PORT 320 #define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */ #define OFF_PTP_SOURCE_UUID 22 /* PTPv1 only */ -- cgit v1.2.3 From 4e0d84634445ed550498d613a49ea8f6cfa5e66c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Nov 2021 10:58:40 +0100 Subject: futex: Fix sparc32/m68k/nds32 build regression The recent futex cleanup series, botched up a rename of some function names, breaking sparc32, m68k and nds32: include/asm-generic/futex.h:17:2: error: implicit declaration of function 'futex_atomic_cmpxchg_inatomic_local_generic'; did you mean 'futex_atomic_cmpxchg_inatomic_local'? [-Werror=implicit-function-declaration] Fix the macros to point to the correct functions. Fixes: 3f2bedabb62c ("futex: Ensure futex_atomic_cmpxchg_inatomic() is present") Reported-by: Stephen Rothwell Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20211126095852.455492-1-arnd@kernel.org --- include/asm-generic/futex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index 30e7fa63b5df..66d6843bfd02 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -14,9 +14,9 @@ * */ #define futex_atomic_cmpxchg_inatomic(uval, uaddr, oldval, newval) \ - futex_atomic_cmpxchg_inatomic_local_generic(uval, uaddr, oldval, newval) + futex_atomic_cmpxchg_inatomic_local(uval, uaddr, oldval, newval) #define arch_futex_atomic_op_inuser(op, oparg, oval, uaddr) \ - arch_futex_atomic_op_inuser_local_generic(op, oparg, oval, uaddr) + futex_atomic_op_inuser_local(op, oparg, oval, uaddr) #endif /* CONFIG_SMP */ #endif -- cgit v1.2.3 From e6b4b873896f0e9298f70d25726f4bb1e1b265ba Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 24 Nov 2021 11:14:29 +0900 Subject: af_unix: Save hash in sk_hash. To replace unix_table_lock with per-hash locks in the next patch, we need to save a hash in each socket because /proc/net/unix or BPF prog iterate sockets while holding a hash table lock and release it later in a different function. Currently, we store a real/pseudo hash in struct unix_address. However, we do not allocate it to unbound sockets, nor should we do just for that. For this purpose, we can use sk_hash. Then, we no longer use the hash field in struct unix_address and can remove it. Also, this patch does - rename unix_insert_socket() to unix_insert_unbound_socket() - remove the redundant list argument from __unix_insert_socket() and unix_insert_unbound_socket() - use 'unsigned int' instead of 'unsigned' in __unix_set_addr_hash() - remove 'inline' from unix_remove_socket() and unix_insert_unbound_socket(). Signed-off-by: Kuniyuki Iwashima Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 7d142e8a0550..89049cc6c066 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -26,7 +26,6 @@ extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; struct unix_address { refcount_t refcnt; int len; - unsigned int hash; struct sockaddr_un name[]; }; -- cgit v1.2.3 From afd20b9290e184c203fe22f2d6b80dc7127ba724 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 24 Nov 2021 11:14:30 +0900 Subject: af_unix: Replace the big lock with small locks. The hash table of AF_UNIX sockets is protected by the single lock. This patch replaces it with per-hash locks. The effect is noticeable when we handle multiple sockets simultaneously. Here is a test result on an EC2 c5.24xlarge instance. It shows latency (under 10us only) in unix_insert_unbound_socket() while 64 CPUs creating 1024 sockets for each in parallel. Without this patch: nsec : count distribution 0 : 179 | | 500 : 3021 |********* | 1000 : 6271 |******************* | 1500 : 6318 |******************* | 2000 : 5828 |***************** | 2500 : 5124 |*************** | 3000 : 4426 |************* | 3500 : 3672 |*********** | 4000 : 3138 |********* | 4500 : 2811 |******** | 5000 : 2384 |******* | 5500 : 2023 |****** | 6000 : 1954 |***** | 6500 : 1737 |***** | 7000 : 1749 |***** | 7500 : 1520 |**** | 8000 : 1469 |**** | 8500 : 1394 |**** | 9000 : 1232 |*** | 9500 : 1138 |*** | 10000 : 994 |*** | With this patch: nsec : count distribution 0 : 1634 |**** | 500 : 13170 |****************************************| 1000 : 13156 |*************************************** | 1500 : 9010 |*************************** | 2000 : 6363 |******************* | 2500 : 4443 |************* | 3000 : 3240 |********* | 3500 : 2549 |******* | 4000 : 1872 |***** | 4500 : 1504 |**** | 5000 : 1247 |*** | 5500 : 1035 |*** | 6000 : 889 |** | 6500 : 744 |** | 7000 : 634 |* | 7500 : 498 |* | 8000 : 433 |* | 8500 : 355 |* | 9000 : 336 |* | 9500 : 284 | | 10000 : 243 | | Signed-off-by: Kuniyuki Iwashima Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 89049cc6c066..a7ef624ed726 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -20,7 +20,7 @@ struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_BITS 8 extern unsigned int unix_tot_inflight; -extern spinlock_t unix_table_lock; +extern spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE]; extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; struct unix_address { -- cgit v1.2.3 From a6914afcdf0e3fb853fce0e0c04710be7427b62f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 10 Nov 2021 12:31:28 +0200 Subject: kobject: Replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211110103128.59888-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index efd56f990a46..c740062b4b1a 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -19,10 +19,10 @@ #include #include #include +#include #include #include #include -#include #include #include #include -- cgit v1.2.3 From 6a2d2ddf2c345e0149bfbffdddc4768a9ab0a741 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 12 Nov 2021 14:32:27 +0100 Subject: drm: Move nomodeset kernel parameter to the DRM subsystem The "nomodeset" kernel cmdline parameter is handled by the vgacon driver but the exported vgacon_text_force() symbol is only used by DRM drivers. It makes much more sense for the parameter logic to be in the subsystem of the drivers that are making use of it. Let's move the vgacon_text_force() function and related logic to the DRM subsystem. While doing that, rename it to drm_firmware_drivers_only() and make it return true if "nomodeset" was used and false otherwise. This is a better description of the condition that the drivers are testing for. Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Acked-by: Thomas Zimmermann Acked-by: Jani Nikula Acked-by: Pekka Paalanen Acked-by: Greg Kroah-Hartman Link: https://patchwork.freedesktop.org/patch/msgid/20211112133230.1595307-4-javierm@redhat.com --- include/drm/drm_drv.h | 5 +++++ include/linux/console.h | 6 ------ 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index a84eb4028e5b..89e26a732175 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -601,5 +601,10 @@ static inline bool drm_drv_uses_atomic_modeset(struct drm_device *dev) int drm_dev_set_unique(struct drm_device *dev, const char *name); +#ifdef CONFIG_VGA_CONSOLE +extern bool drm_firmware_drivers_only(void); +#else +static inline bool drm_firmware_drivers_only(void) { return false; } +#endif #endif diff --git a/include/linux/console.h b/include/linux/console.h index a97f277cfdfa..7cd758a4f44e 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -219,12 +219,6 @@ extern atomic_t ignore_console_lock_warning; #define VESA_HSYNC_SUSPEND 2 #define VESA_POWERDOWN 3 -#ifdef CONFIG_VGA_CONSOLE -extern bool vgacon_text_force(void); -#else -static inline bool vgacon_text_force(void) { return false; } -#endif - extern void console_init(void); /* For deferred console takeover */ -- cgit v1.2.3 From e9aeeba26a8de1f553305722d017022ae7e79280 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 12 Nov 2021 14:32:28 +0100 Subject: drm: Decouple nomodeset from CONFIG_VGA_CONSOLE This relationship was only for historical reasons and the nomodeset option should be available even on platforms that don't enable CONFIG_VGA_CONSOLE. Suggested-by: Thomas Zimmermann Signed-off-by: Javier Martinez Canillas Acked-by: Thomas Zimmermann Acked-by: Jani Nikula Acked-by: Pekka Paalanen Link: https://patchwork.freedesktop.org/patch/msgid/20211112133230.1595307-5-javierm@redhat.com --- include/drm/drm_drv.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index 89e26a732175..da0c836fe8e1 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -601,10 +601,6 @@ static inline bool drm_drv_uses_atomic_modeset(struct drm_device *dev) int drm_dev_set_unique(struct drm_device *dev, const char *name); -#ifdef CONFIG_VGA_CONSOLE extern bool drm_firmware_drivers_only(void); -#else -static inline bool drm_firmware_drivers_only(void) { return false; } -#endif #endif -- cgit v1.2.3 From ed14e769f64311769dcf20dde544b82c158d01b1 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 15 Nov 2021 14:19:12 +0000 Subject: iio: buffer-dma: Remove unused iio_buffer_block struct This structure was never used anywhere, so it can safely be dropped. It will later be re-introduced as a different structure in a different header. Signed-off-by: Paul Cercueil Reviewed-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20211115141925.60164-3-paul@crapouillou.net Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer-dma.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h index ff15c61bf319..6564bdcdac66 100644 --- a/include/linux/iio/buffer-dma.h +++ b/include/linux/iio/buffer-dma.h @@ -17,11 +17,6 @@ struct iio_dma_buffer_queue; struct iio_dma_buffer_ops; struct device; -struct iio_buffer_block { - u32 size; - u32 bytes_used; -}; - /** * enum iio_block_state - State of a struct iio_dma_buffer_block * @IIO_BLOCK_STATE_DEQUEUED: Block is not queued -- cgit v1.2.3 From ffc7c5172a6d1f7ec468066a7172ce65baf1e3e1 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Fri, 19 Nov 2021 10:56:27 +0200 Subject: iio: expose shared parameter in IIO_ENUM_AVAILABLE The shared parameter should be configurable based on its usage, and not constrained to IIO_SHARED_BY_TYPE. This patch aims to improve the flexibility in using the IIO_ENUM_AVAILABLE define and avoid redefining custom iio enums that expose the shared parameter. An example is the ad5766.c driver where IIO_ENUM_AVAILABLE_SHARED was defined in order to achieve `shared` parameter customization. The current state of the IIO_ENUM_AVAILABLE implementation will imply similar redefinitions each time a driver will require access to the `shared` parameter. An example would be admv1013 driver which will require custom device attribute for the frequency translation modes: Quadrature I/Q mode and Intermediate Frequency mode. Signed-off-by: Antoniu Miclaus Reviewed-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20211119085627.6348-1-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 324561b7a5e8..07025d6b3de1 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -103,15 +103,16 @@ ssize_t iio_enum_write(struct iio_dev *indio_dev, /** * IIO_ENUM_AVAILABLE() - Initialize enum available extended channel attribute * @_name: Attribute name ("_available" will be appended to the name) + * @_shared: Whether the attribute is shared between all channels * @_e: Pointer to an iio_enum struct * * Creates a read only attribute which lists all the available enum items in a * space separated list. This should usually be used together with IIO_ENUM() */ -#define IIO_ENUM_AVAILABLE(_name, _e) \ +#define IIO_ENUM_AVAILABLE(_name, _shared, _e) \ { \ .name = (_name "_available"), \ - .shared = IIO_SHARED_BY_TYPE, \ + .shared = _shared, \ .read = iio_enum_available_read, \ .private = (uintptr_t)(_e), \ } -- cgit v1.2.3 From 447a39f4e89d992f82f03521d46746f6a4348578 Mon Sep 17 00:00:00 2001 From: Sankeerth Billakanti Date: Tue, 2 Nov 2021 13:18:43 +0530 Subject: drm/dp: Add macro to check max_downspread capability Add a macro to check for the max_downspread capability in drm_dp_helper. Signed-off-by: Sankeerth Billakanti Reviewed-by: Stephen Boyd changes in v4: - Return 1 for DPCD version >= v1.1 (Stephen Boyd) Link: https://lore.kernel.org/r/1635839325-401-4-git-send-email-quic_sbillaka@quicinc.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Rob Clark --- include/drm/drm_dp_helper.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index b52df4db3e8f..596635ceb9b2 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -1766,6 +1766,13 @@ drm_dp_tps3_supported(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) dpcd[DP_MAX_LANE_COUNT] & DP_TPS3_SUPPORTED; } +static inline bool +drm_dp_max_downspread(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) +{ + return dpcd[DP_DPCD_REV] >= 0x11 || + dpcd[DP_MAX_DOWNSPREAD] & DP_MAX_DOWNSPREAD_0_5; +} + static inline bool drm_dp_tps4_supported(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) { -- cgit v1.2.3 From 75c5bd68b699bbcb6d25879644d62de4da14ab92 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Fri, 26 Nov 2021 10:48:19 +0100 Subject: ieee80211: change HE nominal packet padding value defines It's easier to use and understand, and to extend for EHT later, if we use the values here instead of the shifted values. Unfortunately, we need to add _POS so that we can use it in places like iwlwifi/mvm where constants are needed. While at it, fix the typo ("NOMIMAL") which also helps catch any conflicts. Signed-off-by: Miri Korenblit Link: https://lore.kernel.org/r/20211126104817.7c29a05b8eb5.I2ca9faf06e177e3035bec91e2ae53c2f91d41774@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 11d7af260f20..559b6c644938 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2258,11 +2258,12 @@ enum ieee80211_client_reg_power { #define IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU 0x08 #define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB 0x10 #define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB 0x20 -#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_0US 0x00 -#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_8US 0x40 -#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_16US 0x80 -#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED 0xc0 -#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_MASK 0xc0 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_0US 0x0 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_8US 0x1 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_16US 0x2 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_RESERVED 0x3 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_POS 6 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK 0xc0 #define IEEE80211_HE_PHY_CAP10_HE_MU_M1RU_MAX_LTF 0x01 -- cgit v1.2.3 From 4ba0b2c294fe691921271372f7b59e5cc2ce4b0f Mon Sep 17 00:00:00 2001 From: Russ Weight Date: Thu, 18 Nov 2021 17:55:51 -0800 Subject: fpga: mgr: Use standard dev_release for class driver The FPGA manager class driver data structure is being treated as a managed resource instead of using the standard dev_release call-back function to release the class data structure. This change removes the managed resource code for the freeing of the class data structure and combines the create() and register() functions into a single register() or register_full() function. The register_full() function accepts an info data structure to provide flexibility in passing optional parameters. The register() function supports the current parameter list for users that don't require the use of optional parameters. The devm_fpga_mgr_register() function is retained, and the devm_fpga_mgr_register_full() function is added. Signed-off-by: Russ Weight Reviewed-by: Xu Yilun Acked-by: Xu Yilun Signed-off-by: Moritz Fischer --- include/linux/fpga/fpga-mgr.h | 62 ++++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index 474c1f506307..0f9468771bb9 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -105,6 +105,36 @@ struct fpga_image_info { #endif }; +/** + * struct fpga_compat_id - id for compatibility check + * + * @id_h: high 64bit of the compat_id + * @id_l: low 64bit of the compat_id + */ +struct fpga_compat_id { + u64 id_h; + u64 id_l; +}; + +/** + * struct fpga_manager_info - collection of parameters for an FPGA Manager + * @name: fpga manager name + * @compat_id: FPGA manager id for compatibility check. + * @mops: pointer to structure of fpga manager ops + * @priv: fpga manager private data + * + * fpga_manager_info contains parameters for the register_full function. + * These are separated into an info structure because they some are optional + * others could be added to in the future. The info structure facilitates + * maintaining a stable API. + */ +struct fpga_manager_info { + const char *name; + struct fpga_compat_id *compat_id; + const struct fpga_manager_ops *mops; + void *priv; +}; + /** * struct fpga_manager_ops - ops for low level fpga manager drivers * @initial_header_size: Maximum number of bytes that should be passed into write_init @@ -143,17 +173,6 @@ struct fpga_manager_ops { #define FPGA_MGR_STATUS_IP_PROTOCOL_ERR BIT(3) #define FPGA_MGR_STATUS_FIFO_OVERFLOW_ERR BIT(4) -/** - * struct fpga_compat_id - id for compatibility check - * - * @id_h: high 64bit of the compat_id - * @id_l: low 64bit of the compat_id - */ -struct fpga_compat_id { - u64 id_h; - u64 id_l; -}; - /** * struct fpga_manager - fpga manager structure * @name: name of low level fpga manager @@ -191,17 +210,18 @@ struct fpga_manager *fpga_mgr_get(struct device *dev); void fpga_mgr_put(struct fpga_manager *mgr); -struct fpga_manager *fpga_mgr_create(struct device *dev, const char *name, - const struct fpga_manager_ops *mops, - void *priv); -void fpga_mgr_free(struct fpga_manager *mgr); -int fpga_mgr_register(struct fpga_manager *mgr); -void fpga_mgr_unregister(struct fpga_manager *mgr); +struct fpga_manager * +fpga_mgr_register_full(struct device *parent, const struct fpga_manager_info *info); -int devm_fpga_mgr_register(struct device *dev, struct fpga_manager *mgr); +struct fpga_manager * +fpga_mgr_register(struct device *parent, const char *name, + const struct fpga_manager_ops *mops, void *priv); +void fpga_mgr_unregister(struct fpga_manager *mgr); -struct fpga_manager *devm_fpga_mgr_create(struct device *dev, const char *name, - const struct fpga_manager_ops *mops, - void *priv); +struct fpga_manager * +devm_fpga_mgr_register_full(struct device *parent, const struct fpga_manager_info *info); +struct fpga_manager * +devm_fpga_mgr_register(struct device *parent, const char *name, + const struct fpga_manager_ops *mops, void *priv); #endif /*_LINUX_FPGA_MGR_H */ -- cgit v1.2.3 From 0d70af3c2530a70f1b2c197feaa63fbd3548ce34 Mon Sep 17 00:00:00 2001 From: Russ Weight Date: Thu, 18 Nov 2021 17:55:52 -0800 Subject: fpga: bridge: Use standard dev_release for class driver The FPGA bridge class driver data structure is being treated as a managed resource instead of using the standard dev_release call-back function to release the class data structure. This change removes the managed resource code and combines the create() and register() functions into a single register() function. Signed-off-by: Russ Weight Reviewed-by: Xu Yilun Acked-by: Xu Yilun Signed-off-by: Moritz Fischer --- include/linux/fpga/fpga-bridge.h | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/fpga/fpga-bridge.h b/include/linux/fpga/fpga-bridge.h index 6c3c28806ff1..223da48a6d18 100644 --- a/include/linux/fpga/fpga-bridge.h +++ b/include/linux/fpga/fpga-bridge.h @@ -22,6 +22,23 @@ struct fpga_bridge_ops { const struct attribute_group **groups; }; +/** + * struct fpga_bridge_info - collection of parameters an FPGA Bridge + * @name: fpga bridge name + * @br_ops: pointer to structure of fpga bridge ops + * @priv: fpga bridge private data + * + * fpga_bridge_info contains parameters for the register function. These + * are separated into an info structure because they some are optional + * others could be added to in the future. The info structure facilitates + * maintaining a stable API. + */ +struct fpga_bridge_info { + const char *name; + const struct fpga_bridge_ops *br_ops; + void *priv; +}; + /** * struct fpga_bridge - FPGA bridge structure * @name: name of low level FPGA bridge @@ -62,15 +79,10 @@ int of_fpga_bridge_get_to_list(struct device_node *np, struct fpga_image_info *info, struct list_head *bridge_list); -struct fpga_bridge *fpga_bridge_create(struct device *dev, const char *name, - const struct fpga_bridge_ops *br_ops, - void *priv); -void fpga_bridge_free(struct fpga_bridge *br); -int fpga_bridge_register(struct fpga_bridge *br); +struct fpga_bridge * +fpga_bridge_register(struct device *parent, const char *name, + const struct fpga_bridge_ops *br_ops, + void *priv); void fpga_bridge_unregister(struct fpga_bridge *br); -struct fpga_bridge -*devm_fpga_bridge_create(struct device *dev, const char *name, - const struct fpga_bridge_ops *br_ops, void *priv); - #endif /* _LINUX_FPGA_BRIDGE_H */ -- cgit v1.2.3 From 8886a579744fbfa53e69aa453ed10ae3b1f9abac Mon Sep 17 00:00:00 2001 From: Russ Weight Date: Thu, 18 Nov 2021 17:55:53 -0800 Subject: fpga: region: Use standard dev_release for class driver The FPGA region class driver data structure is being treated as a managed resource instead of using the standard dev_release call-back function to release the class data structure. This change removes the managed resource code and combines the create() and register() functions into a single register() or register_full() function. The register_full() function accepts an info data structure to provide flexibility in passing optional parameters. The register() function supports the current parameter list for users that don't require the use of optional parameters. Signed-off-by: Russ Weight Reviewed-by: Xu Yilun Acked-by: Xu Yilun Signed-off-by: Moritz Fischer --- include/linux/fpga/fpga-region.h | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/fpga/fpga-region.h b/include/linux/fpga/fpga-region.h index 27cb706275db..3b87f232425c 100644 --- a/include/linux/fpga/fpga-region.h +++ b/include/linux/fpga/fpga-region.h @@ -7,6 +7,27 @@ #include #include +struct fpga_region; + +/** + * struct fpga_region_info - collection of parameters an FPGA Region + * @mgr: fpga region manager + * @compat_id: FPGA region id for compatibility check. + * @priv: fpga region private data + * @get_bridges: optional function to get bridges to a list + * + * fpga_region_info contains parameters for the register_full function. + * These are separated into an info structure because they some are optional + * others could be added to in the future. The info structure facilitates + * maintaining a stable API. + */ +struct fpga_region_info { + struct fpga_manager *mgr; + struct fpga_compat_id *compat_id; + void *priv; + int (*get_bridges)(struct fpga_region *region); +}; + /** * struct fpga_region - FPGA Region structure * @dev: FPGA Region device @@ -37,15 +58,12 @@ struct fpga_region *fpga_region_class_find( int fpga_region_program_fpga(struct fpga_region *region); -struct fpga_region -*fpga_region_create(struct device *dev, struct fpga_manager *mgr, - int (*get_bridges)(struct fpga_region *)); -void fpga_region_free(struct fpga_region *region); -int fpga_region_register(struct fpga_region *region); -void fpga_region_unregister(struct fpga_region *region); +struct fpga_region * +fpga_region_register_full(struct device *parent, const struct fpga_region_info *info); -struct fpga_region -*devm_fpga_region_create(struct device *dev, struct fpga_manager *mgr, - int (*get_bridges)(struct fpga_region *)); +struct fpga_region * +fpga_region_register(struct device *parent, struct fpga_manager *mgr, + int (*get_bridges)(struct fpga_region *)); +void fpga_region_unregister(struct fpga_region *region); #endif /* _FPGA_REGION_H */ -- cgit v1.2.3 From 7e78781df491e4beb475bac22e6c44236a5002d7 Mon Sep 17 00:00:00 2001 From: Gurchetan Singh Date: Mon, 22 Nov 2021 15:22:09 -0800 Subject: drm/virtgpu api: define a dummy fence signaled event The current virtgpu implementation of poll(..) drops events when VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK is enabled (otherwise it's like a normal DRM driver). This is because paravirtualized userspaces receives responses in a buffer of type BLOB_MEM_GUEST, not by read(..). To be in line with other DRM drivers and avoid specialized behavior, it is possible to define a dummy event for virtgpu. Paravirtualized userspace will now have to call read(..) on the DRM fd to receive the dummy event. Fixes: b10790434cf2 ("drm/virtgpu api: create context init feature") Reported-by: Daniel Vetter Signed-off-by: Gurchetan Singh Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20211122232210.602-2-gurchetansingh@google.com Signed-off-by: Gerd Hoffmann --- include/uapi/drm/virtgpu_drm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index a13e20cc66b4..0512fde5e697 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -196,6 +196,13 @@ struct drm_virtgpu_context_init { __u64 ctx_set_params; }; +/* + * Event code that's given when VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK is in + * effect. The event size is sizeof(drm_event), since there is no additional + * payload. + */ +#define VIRTGPU_EVENT_FENCE_SIGNALED 0x90000000 + #define DRM_IOCTL_VIRTGPU_MAP \ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map) -- cgit v1.2.3 From 306456c21c792ac633e660bc45f0854b612a0e98 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Tue, 16 Nov 2021 14:54:35 +0200 Subject: mfd: bd70528: Drop BD70528 support The only known BD70528 use-cases are such that the PMIC is controlled from separate MCU which is not running Linux. I am not aware of any Linux driver users. Furthermore, it seems there is no demand for this IC. Let's ease the maintenance burden and drop the driver. We can always add it back if there is sudden need for it. Signed-off-by: Matti Vaittinen Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/cf7dfd98b3403ad363b2b48b57bdbfd57a6416cb.1637066805.git.matti.vaittinen@fi.rohmeurope.com --- include/linux/mfd/rohm-bd70528.h | 389 --------------------------------------- include/linux/mfd/rohm-generic.h | 1 - 2 files changed, 390 deletions(-) delete mode 100644 include/linux/mfd/rohm-bd70528.h (limited to 'include') diff --git a/include/linux/mfd/rohm-bd70528.h b/include/linux/mfd/rohm-bd70528.h deleted file mode 100644 index 4a5966475a35..000000000000 --- a/include/linux/mfd/rohm-bd70528.h +++ /dev/null @@ -1,389 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* Copyright (C) 2018 ROHM Semiconductors */ - -#ifndef __LINUX_MFD_BD70528_H__ -#define __LINUX_MFD_BD70528_H__ - -#include -#include -#include -#include -#include - -enum { - BD70528_BUCK1, - BD70528_BUCK2, - BD70528_BUCK3, - BD70528_LDO1, - BD70528_LDO2, - BD70528_LDO3, - BD70528_LED1, - BD70528_LED2, -}; - -struct bd70528_data { - struct rohm_regmap_dev chip; - struct mutex rtc_timer_lock; -}; - -#define BD70528_BUCK_VOLTS 0x10 -#define BD70528_LDO_VOLTS 0x20 - -#define BD70528_REG_BUCK1_EN 0x0F -#define BD70528_REG_BUCK1_VOLT 0x15 -#define BD70528_REG_BUCK2_EN 0x10 -#define BD70528_REG_BUCK2_VOLT 0x16 -#define BD70528_REG_BUCK3_EN 0x11 -#define BD70528_REG_BUCK3_VOLT 0x17 -#define BD70528_REG_LDO1_EN 0x1b -#define BD70528_REG_LDO1_VOLT 0x1e -#define BD70528_REG_LDO2_EN 0x1c -#define BD70528_REG_LDO2_VOLT 0x1f -#define BD70528_REG_LDO3_EN 0x1d -#define BD70528_REG_LDO3_VOLT 0x20 -#define BD70528_REG_LED_CTRL 0x2b -#define BD70528_REG_LED_VOLT 0x29 -#define BD70528_REG_LED_EN 0x2a - -/* main irq registers */ -#define BD70528_REG_INT_MAIN 0x7E -#define BD70528_REG_INT_MAIN_MASK 0x74 - -/* 'sub irq' registers */ -#define BD70528_REG_INT_SHDN 0x7F -#define BD70528_REG_INT_PWR_FLT 0x80 -#define BD70528_REG_INT_VR_FLT 0x81 -#define BD70528_REG_INT_MISC 0x82 -#define BD70528_REG_INT_BAT1 0x83 -#define BD70528_REG_INT_BAT2 0x84 -#define BD70528_REG_INT_RTC 0x85 -#define BD70528_REG_INT_GPIO 0x86 -#define BD70528_REG_INT_OP_FAIL 0x87 - -#define BD70528_REG_INT_SHDN_MASK 0x75 -#define BD70528_REG_INT_PWR_FLT_MASK 0x76 -#define BD70528_REG_INT_VR_FLT_MASK 0x77 -#define BD70528_REG_INT_MISC_MASK 0x78 -#define BD70528_REG_INT_BAT1_MASK 0x79 -#define BD70528_REG_INT_BAT2_MASK 0x7a -#define BD70528_REG_INT_RTC_MASK 0x7b -#define BD70528_REG_INT_GPIO_MASK 0x7c -#define BD70528_REG_INT_OP_FAIL_MASK 0x7d - -/* Reset related 'magic' registers */ -#define BD70528_REG_SHIPMODE 0x03 -#define BD70528_REG_HWRESET 0x04 -#define BD70528_REG_WARMRESET 0x05 -#define BD70528_REG_STANDBY 0x06 - -/* GPIO registers */ -#define BD70528_REG_GPIO_STATE 0x8F - -#define BD70528_REG_GPIO1_IN 0x4d -#define BD70528_REG_GPIO2_IN 0x4f -#define BD70528_REG_GPIO3_IN 0x51 -#define BD70528_REG_GPIO4_IN 0x53 -#define BD70528_REG_GPIO1_OUT 0x4e -#define BD70528_REG_GPIO2_OUT 0x50 -#define BD70528_REG_GPIO3_OUT 0x52 -#define BD70528_REG_GPIO4_OUT 0x54 - -/* RTC */ - -#define BD70528_REG_RTC_COUNT_H 0x2d -#define BD70528_REG_RTC_COUNT_L 0x2e -#define BD70528_REG_RTC_SEC 0x2f -#define BD70528_REG_RTC_MINUTE 0x30 -#define BD70528_REG_RTC_HOUR 0x31 -#define BD70528_REG_RTC_WEEK 0x32 -#define BD70528_REG_RTC_DAY 0x33 -#define BD70528_REG_RTC_MONTH 0x34 -#define BD70528_REG_RTC_YEAR 0x35 - -#define BD70528_REG_RTC_ALM_SEC 0x36 -#define BD70528_REG_RTC_ALM_START BD70528_REG_RTC_ALM_SEC -#define BD70528_REG_RTC_ALM_MINUTE 0x37 -#define BD70528_REG_RTC_ALM_HOUR 0x38 -#define BD70528_REG_RTC_ALM_WEEK 0x39 -#define BD70528_REG_RTC_ALM_DAY 0x3a -#define BD70528_REG_RTC_ALM_MONTH 0x3b -#define BD70528_REG_RTC_ALM_YEAR 0x3c -#define BD70528_REG_RTC_ALM_MASK 0x3d -#define BD70528_REG_RTC_ALM_REPEAT 0x3e -#define BD70528_REG_RTC_START BD70528_REG_RTC_SEC - -#define BD70528_REG_RTC_WAKE_SEC 0x43 -#define BD70528_REG_RTC_WAKE_START BD70528_REG_RTC_WAKE_SEC -#define BD70528_REG_RTC_WAKE_MIN 0x44 -#define BD70528_REG_RTC_WAKE_HOUR 0x45 -#define BD70528_REG_RTC_WAKE_CTRL 0x46 - -#define BD70528_REG_ELAPSED_TIMER_EN 0x42 -#define BD70528_REG_WAKE_EN 0x46 - -/* WDT registers */ -#define BD70528_REG_WDT_CTRL 0x4A -#define BD70528_REG_WDT_HOUR 0x49 -#define BD70528_REG_WDT_MINUTE 0x48 -#define BD70528_REG_WDT_SEC 0x47 - -/* Charger / Battery */ -#define BD70528_REG_CHG_CURR_STAT 0x59 -#define BD70528_REG_CHG_BAT_STAT 0x57 -#define BD70528_REG_CHG_BAT_TEMP 0x58 -#define BD70528_REG_CHG_IN_STAT 0x56 -#define BD70528_REG_CHG_DCIN_ILIM 0x5d -#define BD70528_REG_CHG_CHG_CURR_WARM 0x61 -#define BD70528_REG_CHG_CHG_CURR_COLD 0x62 - -/* Masks for main IRQ register bits */ -enum { - BD70528_INT_SHDN, -#define BD70528_INT_SHDN_MASK BIT(BD70528_INT_SHDN) - BD70528_INT_PWR_FLT, -#define BD70528_INT_PWR_FLT_MASK BIT(BD70528_INT_PWR_FLT) - BD70528_INT_VR_FLT, -#define BD70528_INT_VR_FLT_MASK BIT(BD70528_INT_VR_FLT) - BD70528_INT_MISC, -#define BD70528_INT_MISC_MASK BIT(BD70528_INT_MISC) - BD70528_INT_BAT1, -#define BD70528_INT_BAT1_MASK BIT(BD70528_INT_BAT1) - BD70528_INT_RTC, -#define BD70528_INT_RTC_MASK BIT(BD70528_INT_RTC) - BD70528_INT_GPIO, -#define BD70528_INT_GPIO_MASK BIT(BD70528_INT_GPIO) - BD70528_INT_OP_FAIL, -#define BD70528_INT_OP_FAIL_MASK BIT(BD70528_INT_OP_FAIL) -}; - -/* IRQs */ -enum { - /* Shutdown register IRQs */ - BD70528_INT_LONGPUSH, - BD70528_INT_WDT, - BD70528_INT_HWRESET, - BD70528_INT_RSTB_FAULT, - BD70528_INT_VBAT_UVLO, - BD70528_INT_TSD, - BD70528_INT_RSTIN, - /* Power failure register IRQs */ - BD70528_INT_BUCK1_FAULT, - BD70528_INT_BUCK2_FAULT, - BD70528_INT_BUCK3_FAULT, - BD70528_INT_LDO1_FAULT, - BD70528_INT_LDO2_FAULT, - BD70528_INT_LDO3_FAULT, - BD70528_INT_LED1_FAULT, - BD70528_INT_LED2_FAULT, - /* VR FAULT register IRQs */ - BD70528_INT_BUCK1_OCP, - BD70528_INT_BUCK2_OCP, - BD70528_INT_BUCK3_OCP, - BD70528_INT_LED1_OCP, - BD70528_INT_LED2_OCP, - BD70528_INT_BUCK1_FULLON, - BD70528_INT_BUCK2_FULLON, - /* PMU register interrupts */ - BD70528_INT_SHORTPUSH, - BD70528_INT_AUTO_WAKEUP, - BD70528_INT_STATE_CHANGE, - /* Charger 1 register IRQs */ - BD70528_INT_BAT_OV_RES, - BD70528_INT_BAT_OV_DET, - BD70528_INT_DBAT_DET, - BD70528_INT_BATTSD_COLD_RES, - BD70528_INT_BATTSD_COLD_DET, - BD70528_INT_BATTSD_HOT_RES, - BD70528_INT_BATTSD_HOT_DET, - BD70528_INT_CHG_TSD, - /* Charger 2 register IRQs */ - BD70528_INT_BAT_RMV, - BD70528_INT_BAT_DET, - BD70528_INT_DCIN2_OV_RES, - BD70528_INT_DCIN2_OV_DET, - BD70528_INT_DCIN2_RMV, - BD70528_INT_DCIN2_DET, - BD70528_INT_DCIN1_RMV, - BD70528_INT_DCIN1_DET, - /* RTC register IRQs */ - BD70528_INT_RTC_ALARM, - BD70528_INT_ELPS_TIM, - /* GPIO register IRQs */ - BD70528_INT_GPIO0, - BD70528_INT_GPIO1, - BD70528_INT_GPIO2, - BD70528_INT_GPIO3, - /* Invalid operation register IRQs */ - BD70528_INT_BUCK1_DVS_OPFAIL, - BD70528_INT_BUCK2_DVS_OPFAIL, - BD70528_INT_BUCK3_DVS_OPFAIL, - BD70528_INT_LED1_VOLT_OPFAIL, - BD70528_INT_LED2_VOLT_OPFAIL, -}; - -/* Masks */ -#define BD70528_INT_LONGPUSH_MASK 0x1 -#define BD70528_INT_WDT_MASK 0x2 -#define BD70528_INT_HWRESET_MASK 0x4 -#define BD70528_INT_RSTB_FAULT_MASK 0x8 -#define BD70528_INT_VBAT_UVLO_MASK 0x10 -#define BD70528_INT_TSD_MASK 0x20 -#define BD70528_INT_RSTIN_MASK 0x40 - -#define BD70528_INT_BUCK1_FAULT_MASK 0x1 -#define BD70528_INT_BUCK2_FAULT_MASK 0x2 -#define BD70528_INT_BUCK3_FAULT_MASK 0x4 -#define BD70528_INT_LDO1_FAULT_MASK 0x8 -#define BD70528_INT_LDO2_FAULT_MASK 0x10 -#define BD70528_INT_LDO3_FAULT_MASK 0x20 -#define BD70528_INT_LED1_FAULT_MASK 0x40 -#define BD70528_INT_LED2_FAULT_MASK 0x80 - -#define BD70528_INT_BUCK1_OCP_MASK 0x1 -#define BD70528_INT_BUCK2_OCP_MASK 0x2 -#define BD70528_INT_BUCK3_OCP_MASK 0x4 -#define BD70528_INT_LED1_OCP_MASK 0x8 -#define BD70528_INT_LED2_OCP_MASK 0x10 -#define BD70528_INT_BUCK1_FULLON_MASK 0x20 -#define BD70528_INT_BUCK2_FULLON_MASK 0x40 - -#define BD70528_INT_SHORTPUSH_MASK 0x1 -#define BD70528_INT_AUTO_WAKEUP_MASK 0x2 -#define BD70528_INT_STATE_CHANGE_MASK 0x10 - -#define BD70528_INT_BAT_OV_RES_MASK 0x1 -#define BD70528_INT_BAT_OV_DET_MASK 0x2 -#define BD70528_INT_DBAT_DET_MASK 0x4 -#define BD70528_INT_BATTSD_COLD_RES_MASK 0x8 -#define BD70528_INT_BATTSD_COLD_DET_MASK 0x10 -#define BD70528_INT_BATTSD_HOT_RES_MASK 0x20 -#define BD70528_INT_BATTSD_HOT_DET_MASK 0x40 -#define BD70528_INT_CHG_TSD_MASK 0x80 - -#define BD70528_INT_BAT_RMV_MASK 0x1 -#define BD70528_INT_BAT_DET_MASK 0x2 -#define BD70528_INT_DCIN2_OV_RES_MASK 0x4 -#define BD70528_INT_DCIN2_OV_DET_MASK 0x8 -#define BD70528_INT_DCIN2_RMV_MASK 0x10 -#define BD70528_INT_DCIN2_DET_MASK 0x20 -#define BD70528_INT_DCIN1_RMV_MASK 0x40 -#define BD70528_INT_DCIN1_DET_MASK 0x80 - -#define BD70528_INT_RTC_ALARM_MASK 0x1 -#define BD70528_INT_ELPS_TIM_MASK 0x2 - -#define BD70528_INT_GPIO0_MASK 0x1 -#define BD70528_INT_GPIO1_MASK 0x2 -#define BD70528_INT_GPIO2_MASK 0x4 -#define BD70528_INT_GPIO3_MASK 0x8 - -#define BD70528_INT_BUCK1_DVS_OPFAIL_MASK 0x1 -#define BD70528_INT_BUCK2_DVS_OPFAIL_MASK 0x2 -#define BD70528_INT_BUCK3_DVS_OPFAIL_MASK 0x4 -#define BD70528_INT_LED1_VOLT_OPFAIL_MASK 0x10 -#define BD70528_INT_LED2_VOLT_OPFAIL_MASK 0x20 - -#define BD70528_DEBOUNCE_MASK 0x3 - -#define BD70528_DEBOUNCE_DISABLE 0 -#define BD70528_DEBOUNCE_15MS 1 -#define BD70528_DEBOUNCE_30MS 2 -#define BD70528_DEBOUNCE_50MS 3 - -#define BD70528_GPIO_DRIVE_MASK 0x2 -#define BD70528_GPIO_PUSH_PULL 0x0 -#define BD70528_GPIO_OPEN_DRAIN 0x2 - -#define BD70528_GPIO_OUT_EN_MASK 0x80 -#define BD70528_GPIO_OUT_ENABLE 0x80 -#define BD70528_GPIO_OUT_DISABLE 0x0 - -#define BD70528_GPIO_OUT_HI 0x1 -#define BD70528_GPIO_OUT_LO 0x0 -#define BD70528_GPIO_OUT_MASK 0x1 - -#define BD70528_GPIO_IN_STATE_BASE 1 - -/* RTC masks to mask out reserved bits */ - -#define BD70528_MASK_ELAPSED_TIMER_EN 0x1 -/* Mask second, min and hour fields - * HW would support ALM irq for over 24h - * (by setting day, month and year too) - * but as we wish to keep this same as for - * wake-up we limit ALM to 24H and only - * unmask sec, min and hour - */ -#define BD70528_MASK_WAKE_EN 0x1 - -/* WDT masks */ -#define BD70528_MASK_WDT_EN 0x1 -#define BD70528_MASK_WDT_HOUR 0x1 -#define BD70528_MASK_WDT_MINUTE 0x7f -#define BD70528_MASK_WDT_SEC 0x7f - -#define BD70528_WDT_STATE_BIT 0x1 -#define BD70528_ELAPSED_STATE_BIT 0x2 -#define BD70528_WAKE_STATE_BIT 0x4 - -/* Charger masks */ -#define BD70528_MASK_CHG_STAT 0x7f -#define BD70528_MASK_CHG_BAT_TIMER 0x20 -#define BD70528_MASK_CHG_BAT_OVERVOLT 0x10 -#define BD70528_MASK_CHG_BAT_DETECT 0x1 -#define BD70528_MASK_CHG_DCIN1_UVLO 0x1 -#define BD70528_MASK_CHG_DCIN_ILIM 0x3f -#define BD70528_MASK_CHG_CHG_CURR 0x1f -#define BD70528_MASK_CHG_TRICKLE_CURR 0x10 - -/* - * Note, external battery register is the lonely rider at - * address 0xc5. See how to stuff that in the regmap - */ -#define BD70528_MAX_REGISTER 0x94 - -/* Buck control masks */ -#define BD70528_MASK_RUN_EN 0x4 -#define BD70528_MASK_STBY_EN 0x2 -#define BD70528_MASK_IDLE_EN 0x1 -#define BD70528_MASK_LED1_EN 0x1 -#define BD70528_MASK_LED2_EN 0x10 - -#define BD70528_MASK_BUCK_VOLT 0xf -#define BD70528_MASK_LDO_VOLT 0x1f -#define BD70528_MASK_LED1_VOLT 0x1 -#define BD70528_MASK_LED2_VOLT 0x10 - -/* Misc irq masks */ -#define BD70528_INT_MASK_SHORT_PUSH 1 -#define BD70528_INT_MASK_AUTO_WAKE 2 -#define BD70528_INT_MASK_POWER_STATE 4 - -#define BD70528_MASK_BUCK_RAMP 0x10 -#define BD70528_SIFT_BUCK_RAMP 4 - -#if IS_ENABLED(CONFIG_BD70528_WATCHDOG) - -int bd70528_wdt_set(struct rohm_regmap_dev *data, int enable, int *old_state); -void bd70528_wdt_lock(struct rohm_regmap_dev *data); -void bd70528_wdt_unlock(struct rohm_regmap_dev *data); - -#else /* CONFIG_BD70528_WATCHDOG */ - -static inline int bd70528_wdt_set(struct rohm_regmap_dev *data, int enable, - int *old_state) -{ - return 0; -} - -static inline void bd70528_wdt_lock(struct rohm_regmap_dev *data) -{ -} - -static inline void bd70528_wdt_unlock(struct rohm_regmap_dev *data) -{ -} - -#endif /* CONFIG_BD70528_WATCHDOG */ - -#endif /* __LINUX_MFD_BD70528_H__ */ diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h index 35b392a0d73a..8fb763a2265a 100644 --- a/include/linux/mfd/rohm-generic.h +++ b/include/linux/mfd/rohm-generic.h @@ -12,7 +12,6 @@ enum rohm_chip_type { ROHM_CHIP_TYPE_BD9573, ROHM_CHIP_TYPE_BD9574, ROHM_CHIP_TYPE_BD9576, - ROHM_CHIP_TYPE_BD70528, ROHM_CHIP_TYPE_BD71815, ROHM_CHIP_TYPE_BD71828, ROHM_CHIP_TYPE_BD71837, -- cgit v1.2.3 From dacb5d8875cc6cd3a553363b4d6f06760fcbe70c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 26 Nov 2021 19:34:21 +0100 Subject: tcp: fix page frag corruption on page fault Steffen reported a TCP stream corruption for HTTP requests served by the apache web-server using a cifs mount-point and memory mapping the relevant file. The root cause is quite similar to the one addressed by commit 20eb4f29b602 ("net: fix sk_page_frag() recursion from memory reclaim"). Here the nested access to the task page frag is caused by a page fault on the (mmapped) user-space memory buffer coming from the cifs file. The page fault handler performs an smb transaction on a different socket, inside the same process context. Since sk->sk_allaction for such socket does not prevent the usage for the task_frag, the nested allocation modify "under the hood" the page frag in use by the outer sendmsg call, corrupting the stream. The overall relevant stack trace looks like the following: httpd 78268 [001] 3461630.850950: probe:tcp_sendmsg_locked: ffffffff91461d91 tcp_sendmsg_locked+0x1 ffffffff91462b57 tcp_sendmsg+0x27 ffffffff9139814e sock_sendmsg+0x3e ffffffffc06dfe1d smb_send_kvec+0x28 [...] ffffffffc06cfaf8 cifs_readpages+0x213 ffffffff90e83c4b read_pages+0x6b ffffffff90e83f31 __do_page_cache_readahead+0x1c1 ffffffff90e79e98 filemap_fault+0x788 ffffffff90eb0458 __do_fault+0x38 ffffffff90eb5280 do_fault+0x1a0 ffffffff90eb7c84 __handle_mm_fault+0x4d4 ffffffff90eb8093 handle_mm_fault+0xc3 ffffffff90c74f6d __do_page_fault+0x1ed ffffffff90c75277 do_page_fault+0x37 ffffffff9160111e page_fault+0x1e ffffffff9109e7b5 copyin+0x25 ffffffff9109eb40 _copy_from_iter_full+0xe0 ffffffff91462370 tcp_sendmsg_locked+0x5e0 ffffffff91462370 tcp_sendmsg_locked+0x5e0 ffffffff91462b57 tcp_sendmsg+0x27 ffffffff9139815c sock_sendmsg+0x4c ffffffff913981f7 sock_write_iter+0x97 ffffffff90f2cc56 do_iter_readv_writev+0x156 ffffffff90f2dff0 do_iter_write+0x80 ffffffff90f2e1c3 vfs_writev+0xa3 ffffffff90f2e27c do_writev+0x5c ffffffff90c042bb do_syscall_64+0x5b ffffffff916000ad entry_SYSCALL_64_after_hwframe+0x65 The cifs filesystem rightfully sets sk_allocations to GFP_NOFS, we can avoid the nesting using the sk page frag for allocation lacking the __GFP_FS flag. Do not define an additional mm-helper for that, as this is strictly tied to the sk page frag usage. v1 -> v2: - use a stricted sk_page_frag() check instead of reordering the code (Eric) Reported-by: Steffen Froemer Fixes: 5640f7685831 ("net: use a per task frag allocator") Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index b32906e1ab55..715cdb4b2b79 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2430,19 +2430,22 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) * @sk: socket * * Use the per task page_frag instead of the per socket one for - * optimization when we know that we're in the normal context and owns + * optimization when we know that we're in process context and own * everything that's associated with %current. * - * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest - * inside other socket operations and end up recursing into sk_page_frag() - * while it's already in use. + * Both direct reclaim and page faults can nest inside other + * socket operations and end up recursing into sk_page_frag() + * while it's already in use: explicitly avoid task page_frag + * usage if the caller is potentially doing any of them. + * This assumes that page fault handlers use the GFP_NOFS flags. * * Return: a per task page_frag if context allows that, * otherwise a per socket one. */ static inline struct page_frag *sk_page_frag(struct sock *sk) { - if (gfpflags_normal_context(sk->sk_allocation)) + if ((sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC | __GFP_FS)) == + (__GFP_DIRECT_RECLAIM | __GFP_FS)) return ¤t->task_frag; return &sk->sk_frag; -- cgit v1.2.3 From 8544f08c816292c2219f28c6eaa69236b978bfb9 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 16 Nov 2021 16:45:12 +0900 Subject: ASoC: soc-dai: update snd_soc_dai_delay() to snd_soc_pcm_dai_delay() Current soc_pcm_pointer() is manually calculating both CPU-DAI's max delay (= A) and Codec-DAI's max delay (= B). static snd_pcm_uframes_t soc_pcm_pointer(...) { ... ^ for_each_rtd_cpu_dais(rtd, i, cpu_dai) (A) cpu_delay = max(cpu_delay, ...); v delay += cpu_delay; ^ for_each_rtd_codec_dais(rtd, i, codec_dai) (B) codec_delay = max(codec_delay, ...); v delay += codec_delay; runtime->delay = delay; ... } Current soc_pcm_pointer() and the total delay calculating is not readable / difficult to understand. This patch update snd_soc_dai_delay() to snd_soc_pcm_dai_delay(), and calcule both CPU/Codec delay in one function. Link: https://lore.kernel.org/r/87fszl4yrq.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/875yssy25z.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 0dcb361a98bb..5d4dd7c5450b 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -208,8 +208,6 @@ int snd_soc_dai_startup(struct snd_soc_dai *dai, struct snd_pcm_substream *substream); void snd_soc_dai_shutdown(struct snd_soc_dai *dai, struct snd_pcm_substream *substream, int rollback); -snd_pcm_sframes_t snd_soc_dai_delay(struct snd_soc_dai *dai, - struct snd_pcm_substream *substream); void snd_soc_dai_suspend(struct snd_soc_dai *dai); void snd_soc_dai_resume(struct snd_soc_dai *dai); int snd_soc_dai_compress_new(struct snd_soc_dai *dai, @@ -238,6 +236,8 @@ int snd_soc_pcm_dai_trigger(struct snd_pcm_substream *substream, int cmd, int rollback); int snd_soc_pcm_dai_bespoke_trigger(struct snd_pcm_substream *substream, int cmd); +void snd_soc_pcm_dai_delay(struct snd_pcm_substream *substream, + snd_pcm_sframes_t *cpu_delay, snd_pcm_sframes_t *codec_delay); int snd_soc_dai_compr_startup(struct snd_soc_dai *dai, struct snd_compr_stream *cstream); -- cgit v1.2.3 From 403f830e7a0be5a9e33c7a9d208574f79887ec57 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 16 Nov 2021 16:45:18 +0900 Subject: ASoC: soc-component: add snd_soc_pcm_component_delay() Current soc-pcm.c :: soc_pcm_pointer() is assuming that component driver might update runtime->delay silently in snd_soc_pcm_component_pointer() (= A). static snd_pcm_uframes_t soc_pcm_pointer(...) { ... /* clearing the previous total delay */ => runtime->delay = 0; (A) offset = snd_soc_pcm_component_pointer(substream); /* base delay if assigned in pointer callback */ => delay = runtime->delay; ... } 1) The behavior that ".pointer callback secretly updates runtime->delay" is strange and confusable. 2) Current snd_soc_pcm_component_pointer() uses 1st found component's .pointer callback only, thus it is no problem for now. But runtime->delay might be overwrote if it adjusted to multiple components in the future. 3) Component delay is updated at .pointer callback timing (secretly). But some components which doesn't have .pointer callback might want to increase runtime->delay for some reasons. We already have .delay function for DAI, but not have for Component. This patch adds new snd_soc_pcm_component_delay() for it. Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/874k8cy25t.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-component.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index a4317144ab62..a52080407b98 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -148,6 +148,8 @@ struct snd_soc_component_driver { struct vm_area_struct *vma); int (*ack)(struct snd_soc_component *component, struct snd_pcm_substream *substream); + snd_pcm_sframes_t (*delay)(struct snd_soc_component *component, + struct snd_pcm_substream *substream); const struct snd_compress_ops *compress_ops; @@ -505,5 +507,7 @@ int snd_soc_pcm_component_pm_runtime_get(struct snd_soc_pcm_runtime *rtd, void snd_soc_pcm_component_pm_runtime_put(struct snd_soc_pcm_runtime *rtd, void *stream, int rollback); int snd_soc_pcm_component_ack(struct snd_pcm_substream *substream); +void snd_soc_pcm_component_delay(struct snd_pcm_substream *substream, + snd_pcm_sframes_t *cpu_delay, snd_pcm_sframes_t *codec_delay); #endif /* __SOC_COMPONENT_H */ -- cgit v1.2.3 From ed618bd80947fa8d9644baf8ac18cb2a02223a5e Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Sat, 27 Nov 2021 17:34:04 +0800 Subject: net: vxlan: add macro definition for number of IANA VXLAN-GPE port Add macro definition for number of IANA VXLAN-GPE port for generic use. Signed-off-by: Hao Chen Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- include/net/vxlan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 08537aa14f7c..5a934bebe630 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -10,6 +10,7 @@ #include #define IANA_VXLAN_UDP_PORT 4789 +#define IANA_VXLAN_GPE_UDP_PORT 4790 /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -- cgit v1.2.3 From a9c8f68ce2c37ced2f7a8667eda71b7753ede398 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 23 Nov 2021 21:27:22 +0200 Subject: spi: pxa2xx: Get rid of unused ->cs_control() Since the last user of the custom ->cs_control() gone, we may get rid of this legacy API completely. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211123192723.44537-2-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/spi/pxa2xx_spi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index eaab121ee575..42e06bfbc2a4 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -9,9 +9,6 @@ #include -#define PXA2XX_CS_ASSERT (0x01) -#define PXA2XX_CS_DEASSERT (0x02) - struct dma_chan; /* @@ -47,7 +44,6 @@ struct pxa2xx_spi_chip { u32 timeout; u8 enable_loopback; int gpio_cs; - void (*cs_control)(u32 command); }; #if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP) -- cgit v1.2.3 From 8393961c53b31078cfc877bc00eb0f67e1474edd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 23 Nov 2021 21:27:23 +0200 Subject: spi: pxa2xx: Get rid of unused enable_loopback member There is no user of the enable_loopback member in the struct pxa2xx_spi_chip. Remote this legacy member completely. The mentioned in the documentation the testing phase can be performed with spidev_test tool. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211123192723.44537-3-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/spi/pxa2xx_spi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index 42e06bfbc2a4..ca74dce36706 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -42,7 +42,6 @@ struct pxa2xx_spi_chip { u8 rx_threshold; u8 dma_burst_size; u32 timeout; - u8 enable_loopback; int gpio_cs; }; -- cgit v1.2.3 From b99658452355d316debee11079e8f1c6c1029355 Mon Sep 17 00:00:00 2001 From: Colin Foster Date: Sun, 28 Nov 2021 17:57:37 -0800 Subject: net: dsa: ocelot: felix: utilize shared mscc-miim driver for indirect MDIO access Switch to a shared MDIO access implementation by way of the mdio-mscc-miim driver. Signed-off-by: Colin Foster Tested-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/mdio/mdio-mscc-miim.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/mdio/mdio-mscc-miim.h (limited to 'include') diff --git a/include/linux/mdio/mdio-mscc-miim.h b/include/linux/mdio/mdio-mscc-miim.h new file mode 100644 index 000000000000..5b4ed2c3cbb9 --- /dev/null +++ b/include/linux/mdio/mdio-mscc-miim.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* + * Driver for the MDIO interface of Microsemi network switches. + * + * Author: Colin Foster + * Copyright (C) 2021 Innovative Advantage + */ +#ifndef MDIO_MSCC_MIIM_H +#define MDIO_MSCC_MIIM_H + +#include +#include +#include + +int mscc_miim_setup(struct device *device, struct mii_bus **bus, + const char *name, struct regmap *mii_regmap, + int status_offset); + +#endif -- cgit v1.2.3 From aeeecb889165617a841e939117f9a8095d0e7d80 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 28 Nov 2021 14:01:02 +0800 Subject: net: snmp: add statistics for tcp small queue check Once tcp small queue check failed in tcp_small_queue_check(), the throughput of tcp will be limited, and it's hard to distinguish whether it is out of tcp congestion control. Add statistics of LINUX_MIB_TCPSMALLQUEUEFAILURE for this scene. Signed-off-by: Menglong Dong Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 904909d020e2..e32ec6932e82 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -292,6 +292,7 @@ enum LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */ LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */ LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */ + LINUX_MIB_TCPSMALLQUEUEFAILURE, /* TCPSmallQueueFailure */ __LINUX_MIB_MAX }; -- cgit v1.2.3 From 17247821ae9b40ea6df8d771cfca97d91675be93 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Thu, 25 Nov 2021 23:46:42 +0100 Subject: mfd: ti_am335x_tscadc: Drop the CNTRLREG_TSC_8WIRE macro In TI's reference manual description for the `AFE_Pen_Ctrl' bit-field of the TSC's CTRL register, there is no mention of 8-wire touchscreens. Even commit f0933a60d190 ("mfd: ti_am335x_tscadc: Update logic in CTRL register for 5-wire TS") says that the value of this bit-field must be the same for 4-wire and 8-wire touchscreens. So let's remove the CNTRLREG_TSC_8WIRE macro to avoid misunderstandings. Signed-off-by: Dario Binacchi Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20211125224642.21011-5-dariobin@libero.it --- include/linux/mfd/ti_am335x_tscadc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index ba13e043d910..4063b0614d90 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -103,7 +103,6 @@ #define CNTRLREG_TSC_AFE_CTRL(val) FIELD_PREP(GENMASK(6, 5), (val)) #define CNTRLREG_TSC_4WIRE CNTRLREG_TSC_AFE_CTRL(1) #define CNTRLREG_TSC_5WIRE CNTRLREG_TSC_AFE_CTRL(2) -#define CNTRLREG_TSC_8WIRE CNTRLREG_TSC_AFE_CTRL(3) #define CNTRLREG_TSC_ENB BIT(7) /*Control registers bitfields for MAGADC IP */ -- cgit v1.2.3 From 79478bf9ea9fa48d30836afa796ac13d8a0f320b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Nov 2021 07:13:54 +0100 Subject: block: move blk_rq_err_bytes to scsi blk_rq_err_bytes is only used by the scsi midlayer, so move it there. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20211117061404.331732-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2949d9ac7484..a78d9a0f2a1b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -947,7 +947,6 @@ struct req_iterator { * blk_rq_pos() : the current sector * blk_rq_bytes() : bytes left in the entire request * blk_rq_cur_bytes() : bytes left in the current segment - * blk_rq_err_bytes() : bytes left till the next error boundary * blk_rq_sectors() : sectors left in the entire request * blk_rq_cur_sectors() : sectors left in the current segment * blk_rq_stats_sectors() : sectors of the entire request used for stats @@ -971,8 +970,6 @@ static inline int blk_rq_cur_bytes(const struct request *rq) return bio_iovec(rq->bio).bv_len; } -unsigned int blk_rq_err_bytes(const struct request *rq); - static inline unsigned int blk_rq_sectors(const struct request *rq) { return blk_rq_bytes(rq) >> SECTOR_SHIFT; -- cgit v1.2.3 From 786d4e01c550e8bb7c9f9f23bca0596a2a33483c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Nov 2021 07:13:55 +0100 Subject: block: remove rq_flush_dcache_pages This function is trivial, and flush_dcache_page is always defined, so just open code it in the 2.5 callers. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20211117061404.331732-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a78d9a0f2a1b..308edc2a4925 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -1132,14 +1132,4 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq) } #endif /* CONFIG_BLK_DEV_ZONED */ -#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" -#endif -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -void rq_flush_dcache_pages(struct request *rq); -#else -static inline void rq_flush_dcache_pages(struct request *rq) -{ -} -#endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */ #endif /* BLK_MQ_H */ -- cgit v1.2.3 From 86416916466514e4ae0b7296d20133b6427c4c1f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 22 Nov 2021 14:06:12 +0100 Subject: block: move GENHD_FL_NATIVE_CAPACITY to disk->state The flag to indicate an unlocked native capacity is dynamic state, not a driver capability flag, so move it to disk->state. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211122130625.1136848-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 74c410263113..e490a71e5e9d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -60,12 +60,6 @@ struct partition_meta_info { * (``BLOCK_EXT_MAJOR``). * This affects the maximum number of partitions. * - * ``GENHD_FL_NATIVE_CAPACITY`` (0x0080): based on information in the - * partition table, the device's capacity has been extended to its - * native capacity; i.e. the device has hidden capacity used by one - * of the partitions (this is a flag used so that native capacity is - * only ever unlocked once). - * * ``GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE`` (0x0100): event polling is * blocked whenever a writer holds an exclusive lock. * @@ -86,7 +80,6 @@ struct partition_meta_info { #define GENHD_FL_CD 0x0008 #define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020 #define GENHD_FL_EXT_DEVT 0x0040 -#define GENHD_FL_NATIVE_CAPACITY 0x0080 #define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 0x0100 #define GENHD_FL_NO_PART_SCAN 0x0200 #define GENHD_FL_HIDDEN 0x0400 @@ -140,6 +133,7 @@ struct gendisk { #define GD_NEED_PART_SCAN 0 #define GD_READ_ONLY 1 #define GD_DEAD 2 +#define GD_NATIVE_CAPACITY 3 struct mutex open_mutex; /* open/close mutex */ unsigned open_partitions; /* number of open partitions */ -- cgit v1.2.3 From 1545e0b419ba1d9b9bee4061d4826340afe6b0aa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 22 Nov 2021 14:06:13 +0100 Subject: block: move GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE to disk->event_flags GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE is all about the event reporting mechanism, so move it to the event_flags field. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211122130625.1136848-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e490a71e5e9d..c1136ff3c91f 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -60,9 +60,6 @@ struct partition_meta_info { * (``BLOCK_EXT_MAJOR``). * This affects the maximum number of partitions. * - * ``GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE`` (0x0100): event polling is - * blocked whenever a writer holds an exclusive lock. - * * ``GENHD_FL_NO_PART_SCAN`` (0x0200): partition scanning is disabled. * Used for loop devices in their default settings and some MMC * devices. @@ -80,7 +77,6 @@ struct partition_meta_info { #define GENHD_FL_CD 0x0008 #define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020 #define GENHD_FL_EXT_DEVT 0x0040 -#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 0x0100 #define GENHD_FL_NO_PART_SCAN 0x0200 #define GENHD_FL_HIDDEN 0x0400 @@ -94,6 +90,8 @@ enum { DISK_EVENT_FLAG_POLL = 1 << 0, /* Forward events to udev */ DISK_EVENT_FLAG_UEVENT = 1 << 1, + /* Block event polling when open for exclusive write */ + DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2, }; struct disk_events; -- cgit v1.2.3 From 1a827ce1b9f2c740d2c6a228afd972970c18bc21 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 22 Nov 2021 14:06:14 +0100 Subject: block: remove GENHD_FL_CD GENHD_FL_CD marks a gendisk as a vaguely CD-ROM like device. Besides being used internally inside of sunvdc.c an xen-blkfront it is used by xen-blkback as a hint to claim a device exported to a guest is a CD-ROM like device. Just check for disk->cdi instead which is the right indicator for "real" CD-ROM or DVD drivers. This will miss the paravirtualized guest drivers, but those make little sense to report anyway. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211122130625.1136848-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c1136ff3c91f..74518c576fbb 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -46,10 +46,6 @@ struct partition_meta_info { * Must not be set for devices which are removed entirely when the * media is removed. * - * ``GENHD_FL_CD`` (0x0008): the block device is a CD-ROM-style - * device. - * Affects responses to the ``CDROM_GET_CAPABILITY`` ioctl. - * * ``GENHD_FL_SUPPRESS_PARTITION_INFO`` (0x0020): don't include * partition information in ``/proc/partitions`` or in the output of * printk_all_partitions(). @@ -74,7 +70,6 @@ struct partition_meta_info { #define GENHD_FL_REMOVABLE 0x0001 /* 2 is unused (used to be GENHD_FL_DRIVERFS) */ /* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */ -#define GENHD_FL_CD 0x0008 #define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020 #define GENHD_FL_EXT_DEVT 0x0040 #define GENHD_FL_NO_PART_SCAN 0x0200 -- cgit v1.2.3 From 46e7eac647b34ed4106a8262f8bedbb90801fadd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 22 Nov 2021 14:06:17 +0100 Subject: block: rename GENHD_FL_NO_PART_SCAN to GENHD_FL_NO_PART The GENHD_FL_NO_PART_SCAN controls more than just partitions canning, so rename it to GENHD_FL_NO_PART. Signed-off-by: Christoph Hellwig Acked-by: Ulf Hansson Link: https://lore.kernel.org/r/20211122130625.1136848-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 74518c576fbb..0b9be3df9489 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -56,15 +56,15 @@ struct partition_meta_info { * (``BLOCK_EXT_MAJOR``). * This affects the maximum number of partitions. * - * ``GENHD_FL_NO_PART_SCAN`` (0x0200): partition scanning is disabled. - * Used for loop devices in their default settings and some MMC - * devices. + * ``GENHD_FL_NO_PART`` (0x0200): partition support is disabled. + * The kernel will not scan for partitions from add_disk, and users + * can't add partitions manually. * * ``GENHD_FL_HIDDEN`` (0x0400): the block device is hidden; it * doesn't produce events, doesn't appear in sysfs, and doesn't have * an associated ``bdev``. * Implies ``GENHD_FL_SUPPRESS_PARTITION_INFO`` and - * ``GENHD_FL_NO_PART_SCAN``. + * ``GENHD_FL_NO_PART``. * Used for multipath devices. */ #define GENHD_FL_REMOVABLE 0x0001 @@ -72,7 +72,7 @@ struct partition_meta_info { /* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */ #define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020 #define GENHD_FL_EXT_DEVT 0x0040 -#define GENHD_FL_NO_PART_SCAN 0x0200 +#define GENHD_FL_NO_PART 0x0200 #define GENHD_FL_HIDDEN 0x0400 enum { @@ -180,8 +180,7 @@ static inline int disk_max_parts(struct gendisk *disk) static inline bool disk_part_scan_enabled(struct gendisk *disk) { - return disk_max_parts(disk) > 1 && - !(disk->flags & GENHD_FL_NO_PART_SCAN); + return disk_max_parts(disk) > 1 && !(disk->flags & GENHD_FL_NO_PART); } static inline dev_t disk_devt(struct gendisk *disk) -- cgit v1.2.3 From 3b5149ac50970669ee0ddb9629ec77ffd5c0622d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 22 Nov 2021 14:06:21 +0100 Subject: block: remove GENHD_FL_SUPPRESS_PARTITION_INFO This flag is not set directly anywhere and only inherited from GENHD_FL_HIDDEN. Just check for GENHD_FL_HIDDEN instead. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211122130625.1136848-11-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 0b9be3df9489..64a2f33ae9ea 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -46,11 +46,6 @@ struct partition_meta_info { * Must not be set for devices which are removed entirely when the * media is removed. * - * ``GENHD_FL_SUPPRESS_PARTITION_INFO`` (0x0020): don't include - * partition information in ``/proc/partitions`` or in the output of - * printk_all_partitions(). - * Used for the null block device and some MMC devices. - * * ``GENHD_FL_EXT_DEVT`` (0x0040): the driver supports extended * dynamic ``dev_t``, i.e. it wants extended device numbers * (``BLOCK_EXT_MAJOR``). @@ -63,14 +58,12 @@ struct partition_meta_info { * ``GENHD_FL_HIDDEN`` (0x0400): the block device is hidden; it * doesn't produce events, doesn't appear in sysfs, and doesn't have * an associated ``bdev``. - * Implies ``GENHD_FL_SUPPRESS_PARTITION_INFO`` and - * ``GENHD_FL_NO_PART``. + * Implies ``GENHD_FL_NO_PART``. * Used for multipath devices. */ #define GENHD_FL_REMOVABLE 0x0001 /* 2 is unused (used to be GENHD_FL_DRIVERFS) */ /* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */ -#define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020 #define GENHD_FL_EXT_DEVT 0x0040 #define GENHD_FL_NO_PART 0x0200 #define GENHD_FL_HIDDEN 0x0400 -- cgit v1.2.3 From 1ebe2e5f9d68e94c524aba876f27b945669a7879 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 22 Nov 2021 14:06:22 +0100 Subject: block: remove GENHD_FL_EXT_DEVT All modern drivers can support extra partitions using the extended dev_t. In fact except for the ioctl method drivers never even see partitions in normal operation. So remove the GENHD_FL_EXT_DEVT and allow extra partitions for all block devices that do support partitions, and require those that do not support partitions to explicit disallow them using GENHD_FL_NO_PART. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211122130625.1136848-12-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 64a2f33ae9ea..b8ced80178d6 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -46,11 +46,6 @@ struct partition_meta_info { * Must not be set for devices which are removed entirely when the * media is removed. * - * ``GENHD_FL_EXT_DEVT`` (0x0040): the driver supports extended - * dynamic ``dev_t``, i.e. it wants extended device numbers - * (``BLOCK_EXT_MAJOR``). - * This affects the maximum number of partitions. - * * ``GENHD_FL_NO_PART`` (0x0200): partition support is disabled. * The kernel will not scan for partitions from add_disk, and users * can't add partitions manually. @@ -64,7 +59,6 @@ struct partition_meta_info { #define GENHD_FL_REMOVABLE 0x0001 /* 2 is unused (used to be GENHD_FL_DRIVERFS) */ /* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */ -#define GENHD_FL_EXT_DEVT 0x0040 #define GENHD_FL_NO_PART 0x0200 #define GENHD_FL_HIDDEN 0x0400 @@ -94,13 +88,13 @@ struct blk_integrity { }; struct gendisk { - /* major, first_minor and minors are input parameters only, - * don't use directly. Use disk_devt() and disk_max_parts(). + /* + * major/first_minor/minors should not be set by any new driver, the + * block core will take care of allocating them automatically. */ - int major; /* major number of driver */ + int major; int first_minor; - int minors; /* maximum number of minors, =1 for - * disks that can't be partitioned. */ + int minors; char disk_name[DISK_NAME_LEN]; /* name of major driver */ @@ -164,18 +158,6 @@ static inline bool disk_live(struct gendisk *disk) #define disk_to_cdi(disk) NULL #endif -static inline int disk_max_parts(struct gendisk *disk) -{ - if (disk->flags & GENHD_FL_EXT_DEVT) - return DISK_MAX_PARTS; - return disk->minors; -} - -static inline bool disk_part_scan_enabled(struct gendisk *disk) -{ - return disk_max_parts(disk) > 1 && !(disk->flags & GENHD_FL_NO_PART); -} - static inline dev_t disk_devt(struct gendisk *disk) { return MKDEV(disk->major, disk->first_minor); -- cgit v1.2.3 From 430cc5d3ab4d0ba0bd011cfbb0035e46ba92920c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 22 Nov 2021 14:06:24 +0100 Subject: block: cleanup the GENHD_FL_* definitions Switch to an enum and tidy up the documentation. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211122130625.1136848-14-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index b8ced80178d6..6906a45bc761 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -39,28 +39,24 @@ struct partition_meta_info { /** * DOC: genhd capability flags * - * ``GENHD_FL_REMOVABLE`` (0x0001): indicates that the block device - * gives access to removable media. - * When set, the device remains present even when media is not - * inserted. - * Must not be set for devices which are removed entirely when the + * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to + * removable media. When set, the device remains present even when media is not + * inserted. Shall not be set for devices which are removed entirely when the * media is removed. * - * ``GENHD_FL_NO_PART`` (0x0200): partition support is disabled. - * The kernel will not scan for partitions from add_disk, and users - * can't add partitions manually. + * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events, + * doesn't appear in sysfs, and can't be opened from userspace or using + * blkdev_get*. Used for the underlying components of multipath devices. + * + * ``GENHD_FL_NO_PART``: partition support is disabled. The kernel will not + * scan for partitions from add_disk, and users can't add partitions manually. * - * ``GENHD_FL_HIDDEN`` (0x0400): the block device is hidden; it - * doesn't produce events, doesn't appear in sysfs, and doesn't have - * an associated ``bdev``. - * Implies ``GENHD_FL_NO_PART``. - * Used for multipath devices. */ -#define GENHD_FL_REMOVABLE 0x0001 -/* 2 is unused (used to be GENHD_FL_DRIVERFS) */ -/* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */ -#define GENHD_FL_NO_PART 0x0200 -#define GENHD_FL_HIDDEN 0x0400 +enum { + GENHD_FL_REMOVABLE = 1 << 0, + GENHD_FL_HIDDEN = 1 << 1, + GENHD_FL_NO_PART = 1 << 2, +}; enum { DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ -- cgit v1.2.3 From 48b5c1fbcd8c5bc6b91a56399a5257b801391dd8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 13 Nov 2021 14:03:26 -0700 Subject: block: only allocate poll_stats if there's a user of them This is essentially never used, yet it's about 1/3rd of the total queue size. Allocate it when needed, and don't embed it in the queue. Kill the queue flag for this while at it, since we can just check the assigned pointer now. Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bd4370baccca..74118e67f649 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -267,7 +267,7 @@ struct request_queue { int poll_nsec; struct blk_stat_callback *poll_cb; - struct blk_rq_stat poll_stat[BLK_MQ_POLL_STATS_BKTS]; + struct blk_rq_stat *poll_stat; struct timer_list timeout; struct work_struct timeout_work; @@ -397,7 +397,6 @@ struct request_queue { #define QUEUE_FLAG_FUA 18 /* device supports FUA writes */ #define QUEUE_FLAG_DAX 19 /* device supports DAX */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ -#define QUEUE_FLAG_POLL_STATS 21 /* collecting stats for hybrid polling */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ #define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ -- cgit v1.2.3 From 72cd9df2ef788d88c138d51223a01ca6281f232d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 23 Nov 2021 17:37:33 -0800 Subject: blk-crypto: remove blk_crypto_unregister() This function is trivial and is only used in one place. Having this function is misleading because it implies that blk_crypto_register() needs to be paired with blk_crypto_unregister(), which is not the case. Just set disk->queue->crypto_profile to NULL directly. Signed-off-by: Eric Biggers Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211124013733.347612-1-ebiggers@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 74118e67f649..0a4416ef4fbf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1170,8 +1170,6 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned lo bool blk_crypto_register(struct blk_crypto_profile *profile, struct request_queue *q); -void blk_crypto_unregister(struct request_queue *q); - #else /* CONFIG_BLK_INLINE_ENCRYPTION */ static inline bool blk_crypto_register(struct blk_crypto_profile *profile, @@ -1180,8 +1178,6 @@ static inline bool blk_crypto_register(struct blk_crypto_profile *profile, return true; } -static inline void blk_crypto_unregister(struct request_queue *q) { } - #endif /* CONFIG_BLK_INLINE_ENCRYPTION */ enum blk_unique_id { -- cgit v1.2.3 From e8dc17e2893b4107366004810ca2a4acf1fc8563 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 25 Oct 2021 09:06:57 +0200 Subject: blk-mq: Add blk_mq_complete_request_direct() Add blk_mq_complete_request_direct() which completes the block request directly instead deferring it to softirq for single queue devices. This is useful for devices which complete the requests in preemptible context and raising softirq from means scheduling ksoftirqd. Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211025070658.1565848-2-bigeasy@linutronix.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 308edc2a4925..d952c3442261 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -752,6 +752,17 @@ static inline void blk_mq_set_request_complete(struct request *rq) WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); } +/* + * Complete the request directly instead of deferring it to softirq or + * completing it another CPU. Useful in preemptible instead of an interrupt. + */ +static inline void blk_mq_complete_request_direct(struct request *rq, + void (*complete)(struct request *rq)) +{ + WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); + complete(rq); +} + void blk_mq_start_request(struct request *rq); void blk_mq_end_request(struct request *rq, blk_status_t error); void __blk_mq_end_request(struct request *rq, blk_status_t error); -- cgit v1.2.3 From 88c9a2ce520ba381bb70658c80ec704f4d60f728 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 26 Nov 2021 12:58:05 +0100 Subject: fork: move copy_io to block/blk-ioc.c Move the copying of the I/O context to the block layer as that is where we can use the proper low-level interfaces. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211126115817.2087431-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 0a9dc40b7be8..bcd47d104d8e 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -129,14 +129,6 @@ static inline void get_io_context_active(struct io_context *ioc) atomic_inc(&ioc->active_ref); } -static inline void ioc_task_link(struct io_context *ioc) -{ - get_io_context_active(ioc); - - WARN_ON_ONCE(atomic_read(&ioc->nr_tasks) <= 0); - atomic_inc(&ioc->nr_tasks); -} - struct task_struct; #ifdef CONFIG_BLOCK void put_io_context(struct io_context *ioc); @@ -144,10 +136,21 @@ void put_io_context_active(struct io_context *ioc); void exit_io_context(struct task_struct *task); struct io_context *get_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node); +int __copy_io(unsigned long clone_flags, struct task_struct *tsk); +static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk) +{ + if (!current->io_context) + return 0; + return __copy_io(clone_flags, tsk); +} #else struct io_context; static inline void put_io_context(struct io_context *ioc) { } static inline void exit_io_context(struct task_struct *task) { } -#endif +static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk) +{ + return 0; +} +#endif /* CONFIG_BLOCK */ -#endif +#endif /* IOCONTEXT_H */ -- cgit v1.2.3 From 3304742562d27fb87a6d8291cc48824dd20f6964 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 26 Nov 2021 12:58:09 +0100 Subject: block: mark put_io_context_active static Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211126115817.2087431-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index bcd47d104d8e..3ba45953d522 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -132,7 +132,6 @@ static inline void get_io_context_active(struct io_context *ioc) struct task_struct; #ifdef CONFIG_BLOCK void put_io_context(struct io_context *ioc); -void put_io_context_active(struct io_context *ioc); void exit_io_context(struct task_struct *task); struct io_context *get_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node); -- cgit v1.2.3 From 50569c24be61eafb3efa06e2a3ccd447f75ae1b0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 26 Nov 2021 12:58:12 +0100 Subject: block: remove get_io_context_active Fold it into it's only caller, and remove a lof of the debug checks that are not needed. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211126115817.2087431-10-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 3ba45953d522..c1229fbd6691 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -113,22 +113,6 @@ struct io_context { struct work_struct release_work; }; -/** - * get_io_context_active - get active reference on ioc - * @ioc: ioc of interest - * - * Only iocs with active reference can issue new IOs. This function - * acquires an active reference on @ioc. The caller must already have an - * active reference on @ioc. - */ -static inline void get_io_context_active(struct io_context *ioc) -{ - WARN_ON_ONCE(atomic_long_read(&ioc->refcount) <= 0); - WARN_ON_ONCE(atomic_read(&ioc->active_ref) <= 0); - atomic_long_inc(&ioc->refcount); - atomic_inc(&ioc->active_ref); -} - struct task_struct; #ifdef CONFIG_BLOCK void put_io_context(struct io_context *ioc); -- cgit v1.2.3 From f3fa33acca9f0058157214800f68b10d8e71ab7a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 26 Nov 2021 13:18:00 +0100 Subject: block: remove the ->rq_disk field in struct request Just use the disk attached to the request_queue instead. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20211126121802.2090656-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ---- include/scsi/scsi_cmnd.h | 2 +- include/scsi/scsi_device.h | 4 ++-- include/trace/events/block.h | 8 ++++---- 4 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d952c3442261..ede7bef8880a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -100,7 +100,6 @@ struct request { struct request *rq_next; }; - struct gendisk *rq_disk; struct block_device *part; #ifdef CONFIG_BLK_RQ_ALLOC_TIME /* Time that the first bio started allocating this request. */ @@ -890,9 +889,6 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, rq->__data_len = bio->bi_iter.bi_size; rq->bio = rq->biotail = bio; rq->ioprio = bio_prio(bio); - - if (bio->bi_bdev) - rq->rq_disk = bio->bi_bdev->bd_disk; } void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 477a800a9543..6794d7322cbd 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -164,7 +164,7 @@ static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); - return *(struct scsi_driver **)rq->rq_disk->private_data; + return *(struct scsi_driver **)rq->q->disk->private_data; } void scsi_done(struct scsi_cmnd *cmd); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index d1c6fc83b1e3..ab7557d84f75 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -275,9 +275,9 @@ scmd_printk(const char *, const struct scsi_cmnd *, const char *, ...); do { \ struct request *__rq = scsi_cmd_to_rq((scmd)); \ \ - if (__rq->rq_disk) \ + if (__rq->q->disk) \ sdev_dbg((scmd)->device, "[%s] " fmt, \ - __rq->rq_disk->disk_name, ##a); \ + __rq->q->disk->disk_name, ##a); \ else \ sdev_dbg((scmd)->device, fmt, ##a); \ } while (0) diff --git a/include/trace/events/block.h b/include/trace/events/block.h index a95daa4d4caa..27170e40e8c9 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -85,7 +85,7 @@ TRACE_EVENT(block_rq_requeue, ), TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); @@ -128,7 +128,7 @@ TRACE_EVENT(block_rq_complete, ), TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; __entry->sector = blk_rq_pos(rq); __entry->nr_sector = nr_bytes >> 9; __entry->error = blk_status_to_errno(error); @@ -161,7 +161,7 @@ DECLARE_EVENT_CLASS(block_rq, ), TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); __entry->bytes = blk_rq_bytes(rq); @@ -512,7 +512,7 @@ TRACE_EVENT(block_rq_remap, ), TP_fast_assign( - __entry->dev = disk_devt(rq->rq_disk); + __entry->dev = disk_devt(rq->q->disk); __entry->sector = blk_rq_pos(rq); __entry->nr_sector = blk_rq_sectors(rq); __entry->old_dev = dev; -- cgit v1.2.3 From b84ba30b6c7a75babdf73b83bc3c7b59b944501a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 26 Nov 2021 13:18:01 +0100 Subject: block: remove the gendisk argument to blk_execute_rq Remove the gendisk aregument to blk_execute_rq and blk_execute_rq_nowait given that it is unused now. Also convert the boolean at_head parameter to actually use the bool type while touching the prototype. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20211126121802.2090656-5-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ede7bef8880a..1b87b7c8bbff 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -924,10 +924,9 @@ int blk_rq_unmap_user(struct bio *); int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); int blk_rq_append_bio(struct request *rq, struct bio *bio); -void blk_execute_rq_nowait(struct gendisk *, struct request *, int, - rq_end_io_fn *); -blk_status_t blk_execute_rq(struct gendisk *bd_disk, struct request *rq, - int at_head); +void blk_execute_rq_nowait(struct request *rq, bool at_head, + rq_end_io_fn *end_io); +blk_status_t blk_execute_rq(struct request *rq, bool at_head); struct req_iterator { struct bvec_iter iter; -- cgit v1.2.3 From a30e3441325ba4011ddf125932cda21ca820c0bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 26 Nov 2021 13:18:02 +0100 Subject: scsi: remove the gendisk argument to scsi_ioctl Now that blk_execute_rq does not take a gendisk argument there is no need to pass it through the scsi_ioctl callchain either. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20211126121802.2090656-6-hch@lst.de Signed-off-by: Jens Axboe --- include/scsi/scsi_ioctl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_ioctl.h b/include/scsi/scsi_ioctl.h index d2cb9aeaf1f1..beac64e38b87 100644 --- a/include/scsi/scsi_ioctl.h +++ b/include/scsi/scsi_ioctl.h @@ -45,8 +45,8 @@ typedef struct scsi_fctargaddress { int scsi_ioctl_block_when_processing_errors(struct scsi_device *sdev, int cmd, bool ndelay); -int scsi_ioctl(struct scsi_device *sdev, struct gendisk *disk, fmode_t mode, - int cmd, void __user *arg); +int scsi_ioctl(struct scsi_device *sdev, fmode_t mode, int cmd, + void __user *arg); int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp); int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp); bool scsi_cmd_allowed(unsigned char *cmd, fmode_t mode); -- cgit v1.2.3 From cdef485217d30382f3bf6448c54b4401648fe3f1 Mon Sep 17 00:00:00 2001 From: msizanoen1 Date: Tue, 23 Nov 2021 13:48:32 +0100 Subject: ipv6: fix memory leak in fib6_rule_suppress The kernel leaks memory when a `fib` rule is present in IPv6 nftables firewall rules and a suppress_prefix rule is present in the IPv6 routing rules (used by certain tools such as wg-quick). In such scenarios, every incoming packet will leak an allocation in `ip6_dst_cache` slab cache. After some hours of `bpftrace`-ing and source code reading, I tracked down the issue to ca7a03c41753 ("ipv6: do not free rt if FIB_LOOKUP_NOREF is set on suppress rule"). The problem with that change is that the generic `args->flags` always have `FIB_LOOKUP_NOREF` set[1][2] but the IPv6-specific flag `RT6_LOOKUP_F_DST_NOREF` might not be, leading to `fib6_rule_suppress` not decreasing the refcount when needed. How to reproduce: - Add the following nftables rule to a prerouting chain: meta nfproto ipv6 fib saddr . mark . iif oif missing drop This can be done with: sudo nft create table inet test sudo nft create chain inet test test_chain '{ type filter hook prerouting priority filter + 10; policy accept; }' sudo nft add rule inet test test_chain meta nfproto ipv6 fib saddr . mark . iif oif missing drop - Run: sudo ip -6 rule add table main suppress_prefixlength 0 - Watch `sudo slabtop -o | grep ip6_dst_cache` to see memory usage increase with every incoming ipv6 packet. This patch exposes the protocol-specific flags to the protocol specific `suppress` function, and check the protocol-specific `flags` argument for RT6_LOOKUP_F_DST_NOREF instead of the generic FIB_LOOKUP_NOREF when decreasing the refcount, like this. [1]: https://github.com/torvalds/linux/blob/ca7a03c4175366a92cee0ccc4fec0038c3266e26/net/ipv6/fib6_rules.c#L71 [2]: https://github.com/torvalds/linux/blob/ca7a03c4175366a92cee0ccc4fec0038c3266e26/net/ipv6/fib6_rules.c#L99 Link: https://bugzilla.kernel.org/show_bug.cgi?id=215105 Fixes: ca7a03c41753 ("ipv6: do not free rt if FIB_LOOKUP_NOREF is set on suppress rule") Cc: stable@vger.kernel.org Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- include/net/fib_rules.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 4b10676c69d1..bd07484ab9dd 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -69,7 +69,7 @@ struct fib_rules_ops { int (*action)(struct fib_rule *, struct flowi *, int, struct fib_lookup_arg *); - bool (*suppress)(struct fib_rule *, + bool (*suppress)(struct fib_rule *, int, struct fib_lookup_arg *); int (*match)(struct fib_rule *, struct flowi *, int); @@ -218,7 +218,9 @@ INDIRECT_CALLABLE_DECLARE(int fib4_rule_action(struct fib_rule *rule, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib6_rule_suppress(struct fib_rule *rule, + int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib4_rule_suppress(struct fib_rule *rule, + int flags, struct fib_lookup_arg *arg)); #endif -- cgit v1.2.3 From 20ae1d6aa159eb91a9bf09ff92ccaa94dbea92c2 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Nov 2021 10:39:25 -0500 Subject: wireguard: device: reset peer src endpoint when netns exits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each peer's endpoint contains a dst_cache entry that takes a reference to another netdev. When the containing namespace exits, we take down the socket and prevent future sockets from being created (by setting creating_net to NULL), which removes that potential reference on the netns. However, it doesn't release references to the netns that a netdev cached in dst_cache might be taking, so the netns still might fail to exit. Since the socket is gimped anyway, we can simply clear all the dst_caches (by way of clearing the endpoint src), which will release all references. However, the current dst_cache_reset function only releases those references lazily. But it turns out that all of our usages of wg_socket_clear_peer_endpoint_src are called from contexts that are not exactly high-speed or bottle-necked. For example, when there's connection difficulty, or when userspace is reconfiguring the interface. And in particular for this patch, when the netns is exiting. So for those cases, it makes more sense to call dst_release immediately. For that, we add a small helper function to dst_cache. This patch also adds a test to netns.sh from Hangbin Liu to ensure this doesn't regress. Tested-by: Hangbin Liu Reported-by: Xiumei Mu Cc: Toke Høiland-Jørgensen Cc: Paolo Abeni Fixes: 900575aa33a3 ("wireguard: device: avoid circular netns references") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- include/net/dst_cache.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h index 67634675e919..df6622a5fe98 100644 --- a/include/net/dst_cache.h +++ b/include/net/dst_cache.h @@ -79,6 +79,17 @@ static inline void dst_cache_reset(struct dst_cache *dst_cache) dst_cache->reset_ts = jiffies; } +/** + * dst_cache_reset_now - invalidate the cache contents immediately + * @dst_cache: the cache + * + * The caller must be sure there are no concurrent users, as this frees + * all dst_cache users immediately, rather than waiting for the next + * per-cpu usage like dst_cache_reset does. Most callers should use the + * higher speed lazily-freed dst_cache_reset function instead. + */ +void dst_cache_reset_now(struct dst_cache *dst_cache); + /** * dst_cache_init - initialize the cache, allocating the required storage * @dst_cache: the cache -- cgit v1.2.3 From f7e5b9bfa6c8820407b64eabc1f29c9a87e8993d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 29 Nov 2021 10:39:29 -0500 Subject: siphash: use _unaligned version by default On ARM v6 and later, we define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS because the ordinary load/store instructions (ldr, ldrh, ldrb) can tolerate any misalignment of the memory address. However, load/store double and load/store multiple instructions (ldrd, ldm) may still only be used on memory addresses that are 32-bit aligned, and so we have to use the CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS macro with care, or we may end up with a severe performance hit due to alignment traps that require fixups by the kernel. Testing shows that this currently happens with clang-13 but not gcc-11. In theory, any compiler version can produce this bug or other problems, as we are dealing with undefined behavior in C99 even on architectures that support this in hardware, see also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100363. Fortunately, the get_unaligned() accessors do the right thing: when building for ARMv6 or later, the compiler will emit unaligned accesses using the ordinary load/store instructions (but avoid the ones that require 32-bit alignment). When building for older ARM, those accessors will emit the appropriate sequence of ldrb/mov/orr instructions. And on architectures that can truly tolerate any kind of misalignment, the get_unaligned() accessors resolve to the leXX_to_cpup accessors that operate on aligned addresses. Since the compiler will in fact emit ldrd or ldm instructions when building this code for ARM v6 or later, the solution is to use the unaligned accessors unconditionally on architectures where this is known to be fast. The _aligned version of the hash function is however still needed to get the best performance on architectures that cannot do any unaligned access in hardware. This new version avoids the undefined behavior and should produce the fastest hash on all architectures we support. Link: https://lore.kernel.org/linux-arm-kernel/20181008211554.5355-4-ard.biesheuvel@linaro.org/ Link: https://lore.kernel.org/linux-crypto/CAK8P3a2KfmmGDbVHULWevB0hv71P2oi2ZCHEAqT=8dQfa0=cqQ@mail.gmail.com/ Reported-by: Ard Biesheuvel Fixes: 2c956a60778c ("siphash: add cryptographically secure PRF") Signed-off-by: Arnd Bergmann Reviewed-by: Jason A. Donenfeld Acked-by: Ard Biesheuvel Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- include/linux/siphash.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/siphash.h b/include/linux/siphash.h index bf21591a9e5e..0cda61855d90 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -27,9 +27,7 @@ static inline bool siphash_key_is_zero(const siphash_key_t *key) } u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); -#endif u64 siphash_1u64(const u64 a, const siphash_key_t *key); u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); @@ -82,10 +80,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len, static inline u64 siphash(const void *data, size_t len, const siphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) return __siphash_unaligned(data, len, key); -#endif return ___siphash_aligned(data, len, key); } @@ -96,10 +93,8 @@ typedef struct { u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key); -#endif u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); @@ -135,10 +130,9 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, static inline u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) return __hsiphash_unaligned(data, len, key); -#endif return ___hsiphash_aligned(data, len, key); } -- cgit v1.2.3 From b93199b2867646be5b1c84cc0a844df023877806 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 29 Nov 2021 10:48:39 +0100 Subject: drm/ttm: Don't include drm_hashtab.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the include statement for drm_hashtab.h. It's not required by TTM. Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Acked-by: Daniel Vetter Acked-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20211129094841.22499-2-tzimmermann@suse.de --- include/drm/ttm/ttm_bo_api.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index cd785cfa3123..c17b2df9178b 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -32,7 +32,6 @@ #define _TTM_BO_API_H_ #include -#include #include #include #include -- cgit v1.2.3 From a21800bced7cbaf7bb8f5281db17a5d7ef6e197a Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 29 Nov 2021 10:48:41 +0100 Subject: drm: Declare hashtable as legacy The DRM hashtable code is only used by internal functions for legacy UMS drivers. Move the implementation behind CONFIG_DRM_LEGACY and the declarations into legacy header files. Unexport the symbols. Signed-off-by: Thomas Zimmermann Acked-by: Daniel Vetter Acked-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20211129094841.22499-4-tzimmermann@suse.de --- include/drm/drm_device.h | 5 +-- include/drm/drm_hashtab.h | 79 ----------------------------------------------- include/drm/drm_legacy.h | 15 ++++++++- 3 files changed, 15 insertions(+), 84 deletions(-) delete mode 100644 include/drm/drm_hashtab.h (limited to 'include') diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index 604b1d1b2d72..9923c7a6885e 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -6,16 +6,13 @@ #include #include -#include +#include #include struct drm_driver; struct drm_minor; struct drm_master; -struct drm_device_dma; struct drm_vblank_crtc; -struct drm_sg_mem; -struct drm_local_map; struct drm_vma_offset_manager; struct drm_vram_mm; struct drm_fb_helper; diff --git a/include/drm/drm_hashtab.h b/include/drm/drm_hashtab.h deleted file mode 100644 index bb95ff011baf..000000000000 --- a/include/drm/drm_hashtab.h +++ /dev/null @@ -1,79 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismack, ND. USA. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * - **************************************************************************/ -/* - * Simple open hash tab implementation. - * - * Authors: - * Thomas Hellström - */ - -#ifndef DRM_HASHTAB_H -#define DRM_HASHTAB_H - -#include - -#define drm_hash_entry(_ptr, _type, _member) container_of(_ptr, _type, _member) - -struct drm_hash_item { - struct hlist_node head; - unsigned long key; -}; - -struct drm_open_hash { - struct hlist_head *table; - u8 order; -}; - -int drm_ht_create(struct drm_open_hash *ht, unsigned int order); -int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item); -int drm_ht_just_insert_please(struct drm_open_hash *ht, struct drm_hash_item *item, - unsigned long seed, int bits, int shift, - unsigned long add); -int drm_ht_find_item(struct drm_open_hash *ht, unsigned long key, struct drm_hash_item **item); - -void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key); -int drm_ht_remove_key(struct drm_open_hash *ht, unsigned long key); -int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item); -void drm_ht_remove(struct drm_open_hash *ht); - -/* - * RCU-safe interface - * - * The user of this API needs to make sure that two or more instances of the - * hash table manipulation functions are never run simultaneously. - * The lookup function drm_ht_find_item_rcu may, however, run simultaneously - * with any of the manipulation functions as long as it's called from within - * an RCU read-locked section. - */ -#define drm_ht_insert_item_rcu drm_ht_insert_item -#define drm_ht_just_insert_please_rcu drm_ht_just_insert_please -#define drm_ht_remove_key_rcu drm_ht_remove_key -#define drm_ht_remove_item_rcu drm_ht_remove_item -#define drm_ht_find_item_rcu drm_ht_find_item - -#endif diff --git a/include/drm/drm_legacy.h b/include/drm/drm_legacy.h index 58dc8d8cc907..0fc85418aad8 100644 --- a/include/drm/drm_legacy.h +++ b/include/drm/drm_legacy.h @@ -37,7 +37,6 @@ #include #include -#include struct drm_device; struct drm_driver; @@ -51,6 +50,20 @@ struct pci_driver; * you're doing it terribly wrong. */ +/* + * Hash-table Support + */ + +struct drm_hash_item { + struct hlist_node head; + unsigned long key; +}; + +struct drm_open_hash { + struct hlist_head *table; + u8 order; +}; + /** * DMA buffer. */ -- cgit v1.2.3 From d0c4e34db0b0a012352dad499a13738b4102f277 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 15 Nov 2021 13:01:46 +0100 Subject: drm/cma-helper: Move driver and file ops to the end of header Restructure the header file for CMA helpers by moving declarations for driver and file operations to the end of the file. No functional changes. Signed-off-by: Thomas Zimmermann Reviewed-by: Maxime Ripard Reviewed-by: Laurent Pinchart Link: https://patchwork.freedesktop.org/patch/msgid/20211115120148.21766-2-tzimmermann@suse.de --- include/drm/drm_gem_cma_helper.h | 114 ++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h index cd13508acbc1..e0fb7a0cf03f 100644 --- a/include/drm/drm_gem_cma_helper.h +++ b/include/drm/drm_gem_cma_helper.h @@ -32,77 +32,40 @@ struct drm_gem_cma_object { #define to_drm_gem_cma_obj(gem_obj) \ container_of(gem_obj, struct drm_gem_cma_object, base) -#ifndef CONFIG_MMU -#define DRM_GEM_CMA_UNMAPPED_AREA_FOPS \ - .get_unmapped_area = drm_gem_cma_get_unmapped_area, -#else -#define DRM_GEM_CMA_UNMAPPED_AREA_FOPS -#endif - -/** - * DEFINE_DRM_GEM_CMA_FOPS() - macro to generate file operations for CMA drivers - * @name: name for the generated structure - * - * This macro autogenerates a suitable &struct file_operations for CMA based - * drivers, which can be assigned to &drm_driver.fops. Note that this structure - * cannot be shared between drivers, because it contains a reference to the - * current module using THIS_MODULE. - * - * Note that the declaration is already marked as static - if you need a - * non-static version of this you're probably doing it wrong and will break the - * THIS_MODULE reference by accident. - */ -#define DEFINE_DRM_GEM_CMA_FOPS(name) \ - static const struct file_operations name = {\ - .owner = THIS_MODULE,\ - .open = drm_open,\ - .release = drm_release,\ - .unlocked_ioctl = drm_ioctl,\ - .compat_ioctl = drm_compat_ioctl,\ - .poll = drm_poll,\ - .read = drm_read,\ - .llseek = noop_llseek,\ - .mmap = drm_gem_mmap,\ - DRM_GEM_CMA_UNMAPPED_AREA_FOPS \ - } - /* free GEM object */ void drm_gem_cma_free_object(struct drm_gem_object *gem_obj); -/* create memory region for DRM framebuffer */ -int drm_gem_cma_dumb_create_internal(struct drm_file *file_priv, - struct drm_device *drm, - struct drm_mode_create_dumb *args); - -/* create memory region for DRM framebuffer */ -int drm_gem_cma_dumb_create(struct drm_file *file_priv, - struct drm_device *drm, - struct drm_mode_create_dumb *args); - /* allocate physical memory */ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm, size_t size); extern const struct vm_operations_struct drm_gem_cma_vm_ops; -#ifndef CONFIG_MMU -unsigned long drm_gem_cma_get_unmapped_area(struct file *filp, - unsigned long addr, - unsigned long len, - unsigned long pgoff, - unsigned long flags); -#endif - void drm_gem_cma_print_info(struct drm_printer *p, unsigned int indent, const struct drm_gem_object *obj); struct sg_table *drm_gem_cma_get_sg_table(struct drm_gem_object *obj); +int drm_gem_cma_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); +int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); + +/* + * Driver ops + */ + +/* create memory region for DRM framebuffer */ +int drm_gem_cma_dumb_create_internal(struct drm_file *file_priv, + struct drm_device *drm, + struct drm_mode_create_dumb *args); + +/* create memory region for DRM framebuffer */ +int drm_gem_cma_dumb_create(struct drm_file *file_priv, + struct drm_device *drm, + struct drm_mode_create_dumb *args); + struct drm_gem_object * drm_gem_cma_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); -int drm_gem_cma_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); -int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); /** * DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE - CMA GEM driver operations @@ -185,4 +148,47 @@ drm_gem_cma_prime_import_sg_table_vmap(struct drm_device *drm, struct dma_buf_attachment *attach, struct sg_table *sgt); +/* + * File ops + */ + +#ifndef CONFIG_MMU +unsigned long drm_gem_cma_get_unmapped_area(struct file *filp, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags); +#define DRM_GEM_CMA_UNMAPPED_AREA_FOPS \ + .get_unmapped_area = drm_gem_cma_get_unmapped_area, +#else +#define DRM_GEM_CMA_UNMAPPED_AREA_FOPS +#endif + +/** + * DEFINE_DRM_GEM_CMA_FOPS() - macro to generate file operations for CMA drivers + * @name: name for the generated structure + * + * This macro autogenerates a suitable &struct file_operations for CMA based + * drivers, which can be assigned to &drm_driver.fops. Note that this structure + * cannot be shared between drivers, because it contains a reference to the + * current module using THIS_MODULE. + * + * Note that the declaration is already marked as static - if you need a + * non-static version of this you're probably doing it wrong and will break the + * THIS_MODULE reference by accident. + */ +#define DEFINE_DRM_GEM_CMA_FOPS(name) \ + static const struct file_operations name = {\ + .owner = THIS_MODULE,\ + .open = drm_open,\ + .release = drm_release,\ + .unlocked_ioctl = drm_ioctl,\ + .compat_ioctl = drm_compat_ioctl,\ + .poll = drm_poll,\ + .read = drm_read,\ + .llseek = noop_llseek,\ + .mmap = drm_gem_mmap,\ + DRM_GEM_CMA_UNMAPPED_AREA_FOPS \ + } + #endif /* __DRM_GEM_CMA_HELPER_H__ */ -- cgit v1.2.3 From 05b1de51df077a2089e3d8ceec68aa687cff15db Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 15 Nov 2021 13:01:47 +0100 Subject: drm/cma-helper: Export dedicated wrappers for GEM object functions Wrap GEM CMA functions for struct drm_gem_object_funcs and update all callers. This will allow for an update of the public interfaces of the GEM CMA helper library. Signed-off-by: Thomas Zimmermann Reviewed-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20211115120148.21766-3-tzimmermann@suse.de --- include/drm/drm_gem_cma_helper.h | 78 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h index e0fb7a0cf03f..56d2f9fdf9ac 100644 --- a/include/drm/drm_gem_cma_helper.h +++ b/include/drm/drm_gem_cma_helper.h @@ -48,6 +48,84 @@ struct sg_table *drm_gem_cma_get_sg_table(struct drm_gem_object *obj); int drm_gem_cma_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); +/* + * GEM object functions + */ + +/** + * drm_gem_cma_object_free - GEM object function for drm_gem_cma_free_object() + * @obj: GEM object to free + * + * This function wraps drm_gem_cma_free_object(). Drivers that employ the CMA helpers + * should use it as their &drm_gem_object_funcs.free handler. + */ +static inline void drm_gem_cma_object_free(struct drm_gem_object *obj) +{ + drm_gem_cma_free_object(obj); +} + +/** + * drm_gem_cma_object_print_info() - Print &drm_gem_cma_object info for debugfs + * @p: DRM printer + * @indent: Tab indentation level + * @obj: GEM object + * + * This function wraps drm_gem_cma_print_info(). Drivers that employ the CMA helpers + * should use this function as their &drm_gem_object_funcs.print_info handler. + */ +static inline void drm_gem_cma_object_print_info(struct drm_printer *p, unsigned int indent, + const struct drm_gem_object *obj) +{ + drm_gem_cma_print_info(p, indent, obj); +} + +/** + * drm_gem_cma_object_get_sg_table - GEM object function for drm_gem_cma_get_sg_table() + * @obj: GEM object + * + * This function wraps drm_gem_cma_get_sg_table(). Drivers that employ the CMA helpers should + * use it as their &drm_gem_object_funcs.get_sg_table handler. + * + * Returns: + * A pointer to the scatter/gather table of pinned pages or NULL on failure. + */ +static inline struct sg_table *drm_gem_cma_object_get_sg_table(struct drm_gem_object *obj) +{ + return drm_gem_cma_get_sg_table(obj); +} + +/* + * drm_gem_cma_object_vmap - GEM object function for drm_gem_cma_vmap() + * @obj: GEM object + * @map: Returns the kernel virtual address of the CMA GEM object's backing store. + * + * This function wraps drm_gem_cma_vmap(). Drivers that employ the CMA helpers should + * use it as their &drm_gem_object_funcs.vmap handler. + * + * Returns: + * 0 on success or a negative error code on failure. + */ +static inline int drm_gem_cma_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) +{ + return drm_gem_cma_vmap(obj, map); +} + +/** + * drm_gem_cma_object_mmap - GEM object function for drm_gem_cma_mmap() + * @obj: GEM object + * @vma: VMA for the area to be mapped + * + * This function wraps drm_gem_cma_mmap(). Drivers that employ the cma helpers should + * use it as their &drm_gem_object_funcs.mmap handler. + * + * Returns: + * 0 on success or a negative error code on failure. + */ +static inline int drm_gem_cma_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + return drm_gem_cma_mmap(obj, vma); +} + /* * Driver ops */ -- cgit v1.2.3 From e580ea25c08d9e89593bcf80640e29108f0542cb Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 15 Nov 2021 13:01:48 +0100 Subject: drm/cma-helper: Pass GEM CMA object in public interfaces Change all GEM CMA object functions that receive a GEM object of type struct drm_gem_object to expect an object of type struct drm_gem_cma_object instead. This change reduces the number of upcasts from struct drm_gem_object by moving them into callers. The C compiler can now verify that the GEM CMA functions are called with the correct type. For consistency, the patch also renames drm_gem_cma_free_object to drm_gem_cma_free. It further updates documentation for a number of functions. Signed-off-by: Thomas Zimmermann Reviewed-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20211115120148.21766-4-tzimmermann@suse.de --- include/drm/drm_gem_cma_helper.h | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h index 56d2f9fdf9ac..adb507a9dbf0 100644 --- a/include/drm/drm_gem_cma_helper.h +++ b/include/drm/drm_gem_cma_helper.h @@ -32,28 +32,23 @@ struct drm_gem_cma_object { #define to_drm_gem_cma_obj(gem_obj) \ container_of(gem_obj, struct drm_gem_cma_object, base) -/* free GEM object */ -void drm_gem_cma_free_object(struct drm_gem_object *gem_obj); - -/* allocate physical memory */ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm, size_t size); +void drm_gem_cma_free(struct drm_gem_cma_object *cma_obj); +void drm_gem_cma_print_info(const struct drm_gem_cma_object *cma_obj, + struct drm_printer *p, unsigned int indent); +struct sg_table *drm_gem_cma_get_sg_table(struct drm_gem_cma_object *cma_obj); +int drm_gem_cma_vmap(struct drm_gem_cma_object *cma_obj, struct dma_buf_map *map); +int drm_gem_cma_mmap(struct drm_gem_cma_object *cma_obj, struct vm_area_struct *vma); extern const struct vm_operations_struct drm_gem_cma_vm_ops; -void drm_gem_cma_print_info(struct drm_printer *p, unsigned int indent, - const struct drm_gem_object *obj); - -struct sg_table *drm_gem_cma_get_sg_table(struct drm_gem_object *obj); -int drm_gem_cma_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); -int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); - /* * GEM object functions */ /** - * drm_gem_cma_object_free - GEM object function for drm_gem_cma_free_object() + * drm_gem_cma_object_free - GEM object function for drm_gem_cma_free() * @obj: GEM object to free * * This function wraps drm_gem_cma_free_object(). Drivers that employ the CMA helpers @@ -61,7 +56,9 @@ int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); */ static inline void drm_gem_cma_object_free(struct drm_gem_object *obj) { - drm_gem_cma_free_object(obj); + struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); + + drm_gem_cma_free(cma_obj); } /** @@ -76,7 +73,9 @@ static inline void drm_gem_cma_object_free(struct drm_gem_object *obj) static inline void drm_gem_cma_object_print_info(struct drm_printer *p, unsigned int indent, const struct drm_gem_object *obj) { - drm_gem_cma_print_info(p, indent, obj); + const struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); + + drm_gem_cma_print_info(cma_obj, p, indent); } /** @@ -91,7 +90,9 @@ static inline void drm_gem_cma_object_print_info(struct drm_printer *p, unsigned */ static inline struct sg_table *drm_gem_cma_object_get_sg_table(struct drm_gem_object *obj) { - return drm_gem_cma_get_sg_table(obj); + struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); + + return drm_gem_cma_get_sg_table(cma_obj); } /* @@ -107,7 +108,9 @@ static inline struct sg_table *drm_gem_cma_object_get_sg_table(struct drm_gem_ob */ static inline int drm_gem_cma_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) { - return drm_gem_cma_vmap(obj, map); + struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); + + return drm_gem_cma_vmap(cma_obj, map); } /** @@ -123,7 +126,9 @@ static inline int drm_gem_cma_object_vmap(struct drm_gem_object *obj, struct dma */ static inline int drm_gem_cma_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { - return drm_gem_cma_mmap(obj, vma); + struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); + + return drm_gem_cma_mmap(cma_obj, vma); } /* -- cgit v1.2.3 From 4047b9db1aa7512a10ba3560a3f63821c8c40235 Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Mon, 29 Nov 2021 01:28:54 +0530 Subject: net: stmmac: Add platform level debug register dump feature dwmac-qcom-ethqos currently exposes a mechanism to dump rgmii registers after the 'stmmac_dvr_probe()' returns. However with commit 5ec55823438e ("net: stmmac: add clocks management for gmac driver"), we now let 'pm_runtime_put()' disable the clocks before returning from 'stmmac_dvr_probe()'. This causes a crash when 'rgmii_dump()' register dumps are enabled, as the clocks are already off. Since other dwmac drivers (possible future users as well) might require a similar register dump feature, introduce a platform level callback to allow the same. This fixes the crash noticed while enabling rgmii_dump() dumps in dwmac-qcom-ethqos driver as well. It also allows future changes to keep a invoking the register dump callback from the correct place inside 'stmmac_dvr_probe()'. Fixes: 5ec55823438e ("net: stmmac: add clocks management for gmac driver") Cc: Joakim Zhang Cc: David S. Miller Signed-off-by: Bhupesh Sharma Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 89b8e208cd7b..24eea1b05ca2 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -233,6 +233,7 @@ struct plat_stmmacenet_data { int (*clks_config)(void *priv, bool enabled); int (*crosststamp)(ktime_t *device, struct system_counterval_t *system, void *ctx); + void (*dump_debug_regs)(void *priv); void *bsp_priv; struct clk *stmmac_clk; struct clk *pclk; -- cgit v1.2.3 From 5944b5abd8646e8c6ac6af2b55f87dede1dae898 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 30 Nov 2021 12:29:47 +0800 Subject: Bonding: add arp_missed_max option Currently, we use hard code number to verify if we are in the arp_interval timeslice. But some user may want to reduce/extend the verify timeslice. With the similar team option 'missed_max' the uers could change that number based on their own environment. Acked-by: Jay Vosburgh Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- include/net/bond_options.h | 1 + include/net/bonding.h | 1 + include/uapi/linux/if_link.h | 1 + 3 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/bond_options.h b/include/net/bond_options.h index e64833a674eb..dd75c071f67e 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -65,6 +65,7 @@ enum { BOND_OPT_NUM_PEER_NOTIF_ALIAS, BOND_OPT_PEER_NOTIF_DELAY, BOND_OPT_LACP_ACTIVE, + BOND_OPT_MISSED_MAX, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 15e083e18f75..f6ae3a4baea4 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -121,6 +121,7 @@ struct bond_params { int xmit_policy; int miimon; u8 num_peer_notif; + u8 missed_max; int arp_interval; int arp_validate; int arp_all_targets; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index eebd3894fe89..4ac53b30b6dc 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -858,6 +858,7 @@ enum { IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, IFLA_BOND_AD_LACP_ACTIVE, + IFLA_BOND_MISSED_MAX, __IFLA_BOND_MAX, }; -- cgit v1.2.3 From 4c897cfc46a554a523343fc3296333c473a2fc52 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 30 Nov 2021 12:16:20 +0200 Subject: devlink: Simplify devlink resources unregister call The devlink_resources_unregister() used second parameter as an entry point for the recursive removal of devlink resources. None of the callers outside of devlink core needed to use this field, so let's remove it. As part of this removal, the "struct devlink_resource" was moved from .h to .c file as it is not possible to use in any place in the code except devlink.c. Signed-off-by: Leon Romanovsky Signed-off-by: David S. Miller --- include/net/devlink.h | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index e3c88fabd700..043fcec8b0aa 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -361,33 +361,6 @@ devlink_resource_size_params_init(struct devlink_resource_size_params *size_para typedef u64 devlink_resource_occ_get_t(void *priv); -/** - * struct devlink_resource - devlink resource - * @name: name of the resource - * @id: id, per devlink instance - * @size: size of the resource - * @size_new: updated size of the resource, reload is needed - * @size_valid: valid in case the total size of the resource is valid - * including its children - * @parent: parent resource - * @size_params: size parameters - * @list: parent list - * @resource_list: list of child resources - */ -struct devlink_resource { - const char *name; - u64 id; - u64 size; - u64 size_new; - bool size_valid; - struct devlink_resource *parent; - struct devlink_resource_size_params size_params; - struct list_head list; - struct list_head resource_list; - devlink_resource_occ_get_t *occ_get; - void *occ_get_priv; -}; - #define DEVLINK_RESOURCE_ID_PARENT_TOP 0 #define DEVLINK_RESOURCE_GENERIC_NAME_PORTS "physical_ports" @@ -1571,8 +1544,7 @@ int devlink_resource_register(struct devlink *devlink, u64 resource_id, u64 parent_resource_id, const struct devlink_resource_size_params *size_params); -void devlink_resources_unregister(struct devlink *devlink, - struct devlink_resource *resource); +void devlink_resources_unregister(struct devlink *devlink); int devlink_resource_size_get(struct devlink *devlink, u64 resource_id, u64 *p_resource_size); -- cgit v1.2.3 From 79364031c5b4365ca28ac0fa00acfab5bf465be1 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 27 Nov 2021 17:32:00 +0100 Subject: bpf: Make sure bpf_disable_instrumentation() is safe vs preemption. The initial implementation of migrate_disable() for mainline was a wrapper around preempt_disable(). RT kernels substituted this with a real migrate disable implementation. Later on mainline gained true migrate disable support, but neither documentation nor affected code were updated. Remove stale comments claiming that migrate_disable() is PREEMPT_RT only. Don't use __this_cpu_inc() in the !PREEMPT_RT path because preemption is not disabled and the RMW operation can be preempted. Fixes: 74d862b682f51 ("sched: Make migrate_disable/enable() independent of RT") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211127163200.10466-3-bigeasy@linutronix.de --- include/linux/bpf.h | 16 ++-------------- include/linux/filter.h | 3 --- 2 files changed, 2 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 84ff6ef49462..755f38e893be 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1353,28 +1353,16 @@ extern struct mutex bpf_stats_enabled_mutex; * kprobes, tracepoints) to prevent deadlocks on map operations as any of * these events can happen inside a region which holds a map bucket lock * and can deadlock on it. - * - * Use the preemption safe inc/dec variants on RT because migrate disable - * is preemptible on RT and preemption in the middle of the RMW operation - * might lead to inconsistent state. Use the raw variants for non RT - * kernels as migrate_disable() maps to preempt_disable() so the slightly - * more expensive save operation can be avoided. */ static inline void bpf_disable_instrumentation(void) { migrate_disable(); - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_inc(bpf_prog_active); - else - __this_cpu_inc(bpf_prog_active); + this_cpu_inc(bpf_prog_active); } static inline void bpf_enable_instrumentation(void) { - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_dec(bpf_prog_active); - else - __this_cpu_dec(bpf_prog_active); + this_cpu_dec(bpf_prog_active); migrate_enable(); } diff --git a/include/linux/filter.h b/include/linux/filter.h index 24b7ed2677af..534f678ca50f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -640,9 +640,6 @@ static __always_inline u32 bpf_prog_run(const struct bpf_prog *prog, const void * This uses migrate_disable/enable() explicitly to document that the * invocation of a BPF program does not require reentrancy protection * against a BPF program which is invoked from a preempting task. - * - * For non RT enabled kernels migrate_disable/enable() maps to - * preempt_disable/enable(), i.e. it disables also preemption. */ static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog, const void *ctx) -- cgit v1.2.3 From 15fa179f3f45415696d376abc84e0098a9586b33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Fri, 26 Nov 2021 15:03:53 +0100 Subject: ALSA: hda: Fill gaps in NHLT endpoint-interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two key operations missings are: endpoint presence-check and retrieval of matching endpoint hardware configuration (blob). Add operations for both use cases. Signed-off-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Link: https://lore.kernel.org/r/20211126140355.1042684-2-cezary.rojewski@intel.com Signed-off-by: Takashi Iwai --- include/sound/intel-nhlt.h | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h index d0574805865f..089a760d36eb 100644 --- a/include/sound/intel-nhlt.h +++ b/include/sound/intel-nhlt.h @@ -10,6 +10,14 @@ #include +enum nhlt_link_type { + NHLT_LINK_HDA = 0, + NHLT_LINK_DSP = 1, + NHLT_LINK_DMIC = 2, + NHLT_LINK_SSP = 3, + NHLT_LINK_INVALID +}; + #if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_INTEL_NHLT) struct wav_fmt { @@ -33,14 +41,6 @@ struct wav_fmt_ext { u8 sub_fmt[16]; } __packed; -enum nhlt_link_type { - NHLT_LINK_HDA = 0, - NHLT_LINK_DSP = 1, - NHLT_LINK_DMIC = 2, - NHLT_LINK_SSP = 3, - NHLT_LINK_INVALID -}; - enum nhlt_device_type { NHLT_DEVICE_BT = 0, NHLT_DEVICE_DMIC = 1, @@ -132,6 +132,12 @@ void intel_nhlt_free(struct nhlt_acpi_table *addr); int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt); +bool intel_nhlt_has_endpoint_type(struct nhlt_acpi_table *nhlt, u8 link_type); +struct nhlt_specific_cfg * +intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, + u32 bus_id, u8 link_type, u8 vbps, u8 bps, + u8 num_ch, u32 rate, u8 dir, u8 dev_type); + #else struct nhlt_acpi_table; @@ -150,6 +156,21 @@ static inline int intel_nhlt_get_dmic_geo(struct device *dev, { return 0; } + +static inline bool intel_nhlt_has_endpoint_type(struct nhlt_acpi_table *nhlt, + u8 link_type) +{ + return false; +} + +static inline struct nhlt_specific_cfg * +intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, + u32 bus_id, u8 link_type, u8 vbps, u8 bps, + u8 num_ch, u32 rate, u8 dir, u8 dev_type) +{ + return NULL; +} + #endif #endif -- cgit v1.2.3 From e6f2dd0f80674e9d5960337b3e9c2a242441b326 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 29 Nov 2021 19:06:19 -0800 Subject: bpf: Add bpf_loop helper This patch adds the kernel-side and API changes for a new helper function, bpf_loop: long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags); where long (*callback_fn)(u32 index, void *ctx); bpf_loop invokes the "callback_fn" **nr_loops** times or until the callback_fn returns 1. The callback_fn can only return 0 or 1, and this is enforced by the verifier. The callback_fn index is zero-indexed. A few things to please note: ~ The "u64 flags" parameter is currently unused but is included in case a future use case for it arises. ~ In the kernel-side implementation of bpf_loop (kernel/bpf/bpf_iter.c), bpf_callback_t is used as the callback function cast. ~ A program can have nested bpf_loop calls but the program must still adhere to the verifier constraint of its stack depth (the stack depth cannot exceed MAX_BPF_STACK)) ~ Recursive callback_fns do not pass the verifier, due to the call stack for these being too deep. ~ The next patch will include the tests and benchmark Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211130030622.4131246-2-joannekoong@fb.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cc7a0c36e7df..cad0829710be 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2164,6 +2164,7 @@ extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; extern const struct bpf_func_proto bpf_find_vma_proto; +extern const struct bpf_func_proto bpf_loop_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a69e4b04ffeb..211b43afd0fb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4957,6 +4957,30 @@ union bpf_attr { * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. * **-EBUSY** if failed to try lock mmap_lock. * **-EINVAL** for invalid **flags**. + * + * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For **nr_loops**, call **callback_fn** function + * with **callback_ctx** as the context parameter. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. Currently, nr_loops is + * limited to 1 << 23 (~8 million) loops. + * + * long (\*callback_fn)(u32 index, void \*ctx); + * + * where **index** is the current index in the loop. The index + * is zero-indexed. + * + * If **callback_fn** returns 0, the helper will continue to the next + * loop. If return value is 1, the helper will skip the rest of + * the loops and return. Other return values are not used now, + * and will be rejected by the verifier. + * + * Return + * The number of loops performed, **-EINVAL** for invalid **flags**, + * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5140,6 +5164,7 @@ union bpf_attr { FN(skc_to_unix_sock), \ FN(kallsyms_lookup_name), \ FN(find_vma), \ + FN(loop), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 7ad639840acf2800b5f387c495795f995a67a329 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 29 Nov 2021 13:06:43 +0000 Subject: thread_info: Add helpers to snapshot thread flags In there are helpers to manipulate individual thread flags, but where code wants to check several flags at once, it must open code reading current_thread_info()->flags and operating on a snapshot. As some flags can be set remotely it's necessary to use READ_ONCE() to get a consistent snapshot even when IRQs are disabled, but some code forgets to do this. Generally this is unlike to cause a problem in practice, but it is somewhat unsound, and KCSAN will legitimately warn that there is a data race. To make it easier to do the right thing, and to highlight that concurrent modification is possible, add new helpers to snapshot the flags, which should be used in preference to plain reads. Subsequent patches will move existing code to use the new helpers. Signed-off-by: Mark Rutland Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Acked-by: Marco Elver Acked-by: Paul E. McKenney Cc: Boqun Feng Cc: Dmitry Vyukov Cc: Peter Zijlstra Cc: Will Deacon Link: https://lore.kernel.org/r/20211129130653.2037928-2-mark.rutland@arm.com --- include/linux/thread_info.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index ad0c4e041030..73a6f34b3847 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -118,6 +118,15 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) return test_bit(flag, (unsigned long *)&ti->flags); } +/* + * This may be used in noinstr code, and needs to be __always_inline to prevent + * inadvertent instrumentation. + */ +static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti) +{ + return READ_ONCE(ti->flags); +} + #define set_thread_flag(flag) \ set_ti_thread_flag(current_thread_info(), flag) #define clear_thread_flag(flag) \ @@ -130,6 +139,11 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) test_and_clear_ti_thread_flag(current_thread_info(), flag) #define test_thread_flag(flag) \ test_ti_thread_flag(current_thread_info(), flag) +#define read_thread_flags() \ + read_ti_thread_flags(current_thread_info()) + +#define read_task_thread_flags(t) \ + read_ti_thread_flags(task_thread_info(t)) #ifdef CONFIG_GENERIC_ENTRY #define set_syscall_work(fl) \ -- cgit v1.2.3 From 6ce895128b3bff738fe8d9dd74747a03e319e466 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 29 Nov 2021 13:06:44 +0000 Subject: entry: Snapshot thread flags Some thread flags can be set remotely, and so even when IRQs are disabled, the flags can change under our feet. Generally this is unlikely to cause a problem in practice, but it is somewhat unsound, and KCSAN will legitimately warn that there is a data race. To avoid such issues, a snapshot of the flags has to be taken prior to using them. Some places already use READ_ONCE() for that, others do not. Convert them all to the new flag accessor helpers. Signed-off-by: Mark Rutland Signed-off-by: Thomas Gleixner Acked-by: Paul E. McKenney Link: https://lore.kernel.org/r/20211129130653.2037928-3-mark.rutland@arm.com --- include/linux/entry-kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 0d7865a0731c..07c878d6e323 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -75,7 +75,7 @@ static inline void xfer_to_guest_mode_prepare(void) */ static inline bool __xfer_to_guest_mode_work_pending(void) { - unsigned long ti_work = READ_ONCE(current_thread_info()->flags); + unsigned long ti_work = read_thread_flags(); return !!(ti_work & XFER_TO_GUEST_MODE_WORK); } -- cgit v1.2.3 From 4946f15e8c334840bf277a0bf924371eae120fcd Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Tue, 30 Nov 2021 22:40:43 +0100 Subject: genirq/generic_chip: Constify irq_generic_chip_ops The only usage of irq_generic_chip_ops is to pass its address to irq_domain_add_linear() which takes a pointer to const struct irq_domain_ops. Make it const to allow the compiler to put it in read-only memory. [ tglx: Fixed subject prefix ] Signed-off-by: Rikard Falkeborn Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20211130214043.1257585-1-rikard.falkeborn@gmail.com --- include/linux/irqdomain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 553da4899f55..d476405802e9 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -131,7 +131,7 @@ struct irq_domain_ops { #endif }; -extern struct irq_domain_ops irq_generic_chip_ops; +extern const struct irq_domain_ops irq_generic_chip_ops; struct irq_domain_chip_generic; -- cgit v1.2.3 From 24ba53017e188e031f9cb8b290286fad52d2af00 Mon Sep 17 00:00:00 2001 From: Chun-Hung Tseng Date: Wed, 15 Sep 2021 17:02:18 +0800 Subject: rcu: Replace ________p1 and _________p1 with __UNIQUE_ID(rcu) This commit replaces both ________p1 and _________p1 with __UNIQUE_ID(rcu), and also adjusts the callers of the affected macros. __UNIQUE_ID(rcu) will generate unique variable names during compilation, which eliminates the need of ________p1 and _________p1 (both having 4 occurrences prior to the code change). This also avoids the variable name shadowing issue, or at least makes those wishing to cause shadowing problems work much harder to do so. The same idea is used for the min/max macros (commit 589a978 and commit e9092d0). Signed-off-by: Jim Huang Signed-off-by: Chun-Hung Tseng Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 50 +++++++++++++++++++++++++++--------------------- include/linux/srcu.h | 3 ++- 2 files changed, 30 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5e0beb5c5659..88b42eb46406 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -364,6 +364,12 @@ static inline void rcu_preempt_sleep_check(void) { } #define rcu_check_sparse(p, space) #endif /* #else #ifdef __CHECKER__ */ +#define __unrcu_pointer(p, local) \ +({ \ + typeof(*p) *local = (typeof(*p) *__force)(p); \ + rcu_check_sparse(p, __rcu); \ + ((typeof(*p) __force __kernel *)(local)); \ +}) /** * unrcu_pointer - mark a pointer as not being RCU protected * @p: pointer needing to lose its __rcu property @@ -371,39 +377,35 @@ static inline void rcu_preempt_sleep_check(void) { } * Converts @p from an __rcu pointer to a __kernel pointer. * This allows an __rcu pointer to be used with xchg() and friends. */ -#define unrcu_pointer(p) \ -({ \ - typeof(*p) *_________p1 = (typeof(*p) *__force)(p); \ - rcu_check_sparse(p, __rcu); \ - ((typeof(*p) __force __kernel *)(_________p1)); \ -}) +#define unrcu_pointer(p) __unrcu_pointer(p, __UNIQUE_ID(rcu)) -#define __rcu_access_pointer(p, space) \ +#define __rcu_access_pointer(p, local, space) \ ({ \ - typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \ + typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \ rcu_check_sparse(p, space); \ - ((typeof(*p) __force __kernel *)(_________p1)); \ + ((typeof(*p) __force __kernel *)(local)); \ }) -#define __rcu_dereference_check(p, c, space) \ +#define __rcu_dereference_check(p, local, c, space) \ ({ \ /* Dependency order vs. p above. */ \ - typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \ + typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ rcu_check_sparse(p, space); \ - ((typeof(*p) __force __kernel *)(________p1)); \ + ((typeof(*p) __force __kernel *)(local)); \ }) -#define __rcu_dereference_protected(p, c, space) \ +#define __rcu_dereference_protected(p, local, c, space) \ ({ \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_protected() usage"); \ rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) -#define rcu_dereference_raw(p) \ +#define __rcu_dereference_raw(p, local) \ ({ \ /* Dependency order vs. p above. */ \ - typeof(p) ________p1 = READ_ONCE(p); \ - ((typeof(*p) __force __kernel *)(________p1)); \ + typeof(p) local = READ_ONCE(p); \ + ((typeof(*p) __force __kernel *)(local)); \ }) +#define rcu_dereference_raw(p) __rcu_dereference_raw(p, __UNIQUE_ID(rcu)) /** * RCU_INITIALIZER() - statically initialize an RCU-protected global variable @@ -490,7 +492,7 @@ do { \ * when tearing down multi-linked structures after a grace period * has elapsed. */ -#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) +#define rcu_access_pointer(p) __rcu_access_pointer((p), __UNIQUE_ID(rcu), __rcu) /** * rcu_dereference_check() - rcu_dereference with debug checking @@ -526,7 +528,8 @@ do { \ * annotated as __rcu. */ #define rcu_dereference_check(p, c) \ - __rcu_dereference_check((p), (c) || rcu_read_lock_held(), __rcu) + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || rcu_read_lock_held(), __rcu) /** * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking @@ -541,7 +544,8 @@ do { \ * rcu_read_lock() but also rcu_read_lock_bh() into account. */ #define rcu_dereference_bh_check(p, c) \ - __rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu) + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || rcu_read_lock_bh_held(), __rcu) /** * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking @@ -556,7 +560,8 @@ do { \ * only rcu_read_lock() but also rcu_read_lock_sched() into account. */ #define rcu_dereference_sched_check(p, c) \ - __rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \ + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || rcu_read_lock_sched_held(), \ __rcu) /* @@ -566,7 +571,8 @@ do { \ * The no-tracing version of rcu_dereference_raw() must not call * rcu_read_lock_held(). */ -#define rcu_dereference_raw_check(p) __rcu_dereference_check((p), 1, __rcu) +#define rcu_dereference_raw_check(p) \ + __rcu_dereference_check((p), __UNIQUE_ID(rcu), 1, __rcu) /** * rcu_dereference_protected() - fetch RCU pointer when updates prevented @@ -585,7 +591,7 @@ do { \ * but very ugly failures. */ #define rcu_dereference_protected(p, c) \ - __rcu_dereference_protected((p), (c), __rcu) + __rcu_dereference_protected((p), __UNIQUE_ID(rcu), (c), __rcu) /** diff --git a/include/linux/srcu.h b/include/linux/srcu.h index e6011a9975af..01226e4d960a 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -117,7 +117,8 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) * lockdep_is_held() calls. */ #define srcu_dereference_check(p, ssp, c) \ - __rcu_dereference_check((p), (c) || srcu_read_lock_held(ssp), __rcu) + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || srcu_read_lock_held(ssp), __rcu) /** * srcu_dereference - fetch SRCU-protected pointer for later dereferencing -- cgit v1.2.3 From 2407a64f8045552203ee5cb9904ce75ce2fceef4 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 28 Sep 2021 08:21:28 +0800 Subject: rcu: in_irq() cleanup This commit replaces the obsolete and ambiguous macro in_irq() with its shiny new in_hardirq() equivalent. Signed-off-by: Changbin Du Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 9be015305f9f..858f4d429946 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -85,7 +85,7 @@ static inline void rcu_irq_enter_irqson(void) { } static inline void rcu_irq_exit(void) { } static inline void rcu_irq_exit_check_preempt(void) { } #define rcu_is_idle_cpu(cpu) \ - (is_idle_task(current) && !in_nmi() && !in_irq() && !in_serving_softirq()) + (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq()) static inline void exit_rcu(void) { } static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t) { -- cgit v1.2.3 From 502e82b91361955c66c8453b5b7a905b0b5bd5a1 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 7 Nov 2021 17:21:45 +0200 Subject: net/mlx5: Fix access to a non-supported register Validate MRTC register is supported before triggering a delayed work which accesses it. Fixes: 5a1023deeed0 ("net/mlx5: Add periodic update of host time to firmware") Signed-off-by: Aya Levin Reviewed-by: Gal Pressman Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3636df90899a..fbaab440a484 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9698,7 +9698,10 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 regs_84_to_68[0x11]; u8 tracer_registers[0x4]; - u8 regs_63_to_32[0x20]; + u8 regs_63_to_46[0x12]; + u8 mrtc[0x1]; + u8 regs_44_to_32[0xd]; + u8 regs_31_to_0[0x20]; }; -- cgit v1.2.3 From af3bf054661fb11497a7f712ece8b838521227a4 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 1 Dec 2021 01:17:36 +0000 Subject: cgroup: fix a typo in comment In commit 8699b7762a62 ("cgroup: s/child_subsys_mask/subtree_ss_mask/"), we rename child_subsys_mask to subtree_ss_mask. While it missed to rename this in comment. Signed-off-by: Wei Yang Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index db2e147e069f..bb1e79791ed5 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -413,7 +413,7 @@ struct cgroup { /* * The bitmask of subsystems enabled on the child cgroups. * ->subtree_control is the one configured through - * "cgroup.subtree_control" while ->child_ss_mask is the effective + * "cgroup.subtree_control" while ->subtree_ss_mask is the effective * one which may have more subsystems enabled. Controller knobs * are made available iff it's enabled in ->subtree_control. */ -- cgit v1.2.3 From 2696f9010d21aee60be06b2135806e11c79ded8b Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 10 Nov 2021 09:50:34 -0500 Subject: drm/ttm: Clarify that the TTM_PL_SYSTEM is under TTMs control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TTM takes full control over TTM_PL_SYSTEM placed buffers. This makes driver internal usage of TTM_PL_SYSTEM prone to errors because it requires the drivers to manually handle all interactions between TTM which can swap out those buffers whenever it thinks it's the right thing to do and driver. CPU buffers which need to be fenced and shared with accelerators should be placed in driver specific placements that can explicitly handle CPU/accelerator buffer fencing. Currently, apart, from things silently failing nothing is enforcing that requirement which means that it's easy for drivers and new developers to get this wrong. To avoid the confusion we can document this requirement and clarify the solution. This came up during a discussion on dri-devel: https://lore.kernel.org/dri-devel/232f45e9-8748-1243-09bf-56763e6668b3@amd.com Signed-off-by: Zack Rusin Cc: Christian König Cc: Thomas Hellström Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20211110145034.487512-1-zackr@vmware.com --- include/drm/ttm/ttm_placement.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/drm/ttm/ttm_placement.h b/include/drm/ttm/ttm_placement.h index 76d1b9119a2b..8074d0f6cae5 100644 --- a/include/drm/ttm/ttm_placement.h +++ b/include/drm/ttm/ttm_placement.h @@ -35,6 +35,17 @@ /* * Memory regions for data placement. + * + * Buffers placed in TTM_PL_SYSTEM are considered under TTMs control and can + * be swapped out whenever TTMs thinks it is a good idea. + * In cases where drivers would like to use TTM_PL_SYSTEM as a valid + * placement they need to be able to handle the issues that arise due to the + * above manually. + * + * For BO's which reside in system memory but for which the accelerator + * requires direct access (i.e. their usage needs to be synchronized + * between the CPU and accelerator via fences) a new, driver private + * placement that can handle such scenarios is a good idea. */ #define TTM_PL_SYSTEM 0 -- cgit v1.2.3 From e14da77113bb890d7bf9e5d17031bdd476a7ce5e Mon Sep 17 00:00:00 2001 From: William Kucharski Date: Wed, 1 Dec 2021 09:56:58 -0700 Subject: cgroup: Trace event cgroup id fields should be u64 Various trace event fields that store cgroup IDs were declared as ints, but cgroup_id(() returns a u64 and the structures and associated TP_printk() calls were not updated to reflect this. Fixes: 743210386c03 ("cgroup: use cgrp->kn->id as the cgroup ID") Signed-off-by: William Kucharski Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Tejun Heo --- include/trace/events/cgroup.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h index 7f42a3de59e6..dd7d7c9efecd 100644 --- a/include/trace/events/cgroup.h +++ b/include/trace/events/cgroup.h @@ -59,8 +59,8 @@ DECLARE_EVENT_CLASS(cgroup, TP_STRUCT__entry( __field( int, root ) - __field( int, id ) __field( int, level ) + __field( u64, id ) __string( path, path ) ), @@ -71,7 +71,7 @@ DECLARE_EVENT_CLASS(cgroup, __assign_str(path, path); ), - TP_printk("root=%d id=%d level=%d path=%s", + TP_printk("root=%d id=%llu level=%d path=%s", __entry->root, __entry->id, __entry->level, __get_str(path)) ); @@ -126,8 +126,8 @@ DECLARE_EVENT_CLASS(cgroup_migrate, TP_STRUCT__entry( __field( int, dst_root ) - __field( int, dst_id ) __field( int, dst_level ) + __field( u64, dst_id ) __field( int, pid ) __string( dst_path, path ) __string( comm, task->comm ) @@ -142,7 +142,7 @@ DECLARE_EVENT_CLASS(cgroup_migrate, __assign_str(comm, task->comm); ), - TP_printk("dst_root=%d dst_id=%d dst_level=%d dst_path=%s pid=%d comm=%s", + TP_printk("dst_root=%d dst_id=%llu dst_level=%d dst_path=%s pid=%d comm=%s", __entry->dst_root, __entry->dst_id, __entry->dst_level, __get_str(dst_path), __entry->pid, __get_str(comm)) ); @@ -171,8 +171,8 @@ DECLARE_EVENT_CLASS(cgroup_event, TP_STRUCT__entry( __field( int, root ) - __field( int, id ) __field( int, level ) + __field( u64, id ) __string( path, path ) __field( int, val ) ), @@ -185,7 +185,7 @@ DECLARE_EVENT_CLASS(cgroup_event, __entry->val = val; ), - TP_printk("root=%d id=%d level=%d path=%s val=%d", + TP_printk("root=%d id=%llu level=%d path=%s val=%d", __entry->root, __entry->id, __entry->level, __get_str(path), __entry->val) ); -- cgit v1.2.3 From 1a68b346a2c9969c05e80a3b99a9ab160b5655c0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 22 Nov 2021 18:05:31 +0100 Subject: ACPI: Change acpi_device_always_present() into acpi_device_override_status() Currently, acpi_bus_get_status() calls acpi_device_always_present() to allow platform quirks to override the _STA return to report that a device is present (status = ACPI_STA_DEFAULT) independent of the _STA return. In some cases it might also be useful to have the opposite functionality and have a platform quirk which marks a device as not present (status = 0) to work around ACPI table bugs. Change acpi_device_always_present() into a more generic acpi_device_override_status() function to allow this. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 480f9207a4c6..d6fe27b695c3 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -613,9 +613,10 @@ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state); int acpi_disable_wakeup_device_power(struct acpi_device *dev); #ifdef CONFIG_X86 -bool acpi_device_always_present(struct acpi_device *adev); +bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status); #else -static inline bool acpi_device_always_present(struct acpi_device *adev) +static inline bool acpi_device_override_status(struct acpi_device *adev, + unsigned long long *status) { return false; } -- cgit v1.2.3 From 6bbfa44116689469267f1a6e3d233b52114139d2 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 1 Dec 2021 23:45:50 +0900 Subject: kprobes: Limit max data_size of the kretprobe instances The 'kprobe::data_size' is unsigned, thus it can not be negative. But if user sets it enough big number (e.g. (size_t)-8), the result of 'data_size + sizeof(struct kretprobe_instance)' becomes smaller than sizeof(struct kretprobe_instance) or zero. In result, the kretprobe_instance are allocated without enough memory, and kretprobe accesses outside of allocated memory. To avoid this issue, introduce a max limitation of the kretprobe::data_size. 4KB per instance should be OK. Link: https://lkml.kernel.org/r/163836995040.432120.10322772773821182925.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: f47cd9b553aa ("kprobes: kretprobe user entry-handler") Reported-by: zhangyue Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/kprobes.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e974caf39d3e..8c8f7a4d93af 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -153,6 +153,8 @@ struct kretprobe { struct kretprobe_holder *rph; }; +#define KRETPROBE_MAX_DATA_SIZE 4096 + struct kretprobe_instance { union { struct freelist_node freelist; -- cgit v1.2.3 From 072eea6c22b2af680c3949e64f9adde278c71e68 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 30 Nov 2021 13:10:01 +0000 Subject: net: dsa: replace phylink_get_interfaces() with phylink_get_caps() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phylink needs slightly more information than phylink_get_interfaces() allows us to get from the DSA drivers - we need the MAC capabilities. Replace the phylink_get_interfaces() method with phylink_get_caps() to allow DSA drivers to fill in the phylink_config MAC capabilities field as well. Signed-off-by: Russell King (Oracle) Reviewed-by: Marek Behún Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index eff5c44ba377..8ca9d50cbbc2 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -645,8 +645,8 @@ struct dsa_switch_ops { /* * PHYLINK integration */ - void (*phylink_get_interfaces)(struct dsa_switch *ds, int port, - unsigned long *supported_interfaces); + void (*phylink_get_caps)(struct dsa_switch *ds, int port, + struct phylink_config *config); void (*phylink_validate)(struct dsa_switch *ds, int port, unsigned long *supported, struct phylink_link_state *state); -- cgit v1.2.3 From ce8299b6f76f28326fedce2b4da90888bd97eab2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Nov 2021 19:32:46 -0800 Subject: Revert "net: snmp: add statistics for tcp small queue check" This reverts commit aeeecb889165617a841e939117f9a8095d0e7d80. The new SNMP variable (TCPSmallQueueFailure) can be incremented for good reasons, even on a 100Gbit single TCP_STREAM flow. If we really wanted to ease driver debugging [1], this would require something more sophisticated. [1] Usually, if a driver is delaying TX completions too much, this can lead to stalls in TCP output. Various work arounds have been used in the past, like skb_orphan() in ndo_start_xmit(). Signed-off-by: Eric Dumazet Cc: Menglong Dong Link: https://lore.kernel.org/r/20211201033246.2826224-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/snmp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index e32ec6932e82..904909d020e2 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -292,7 +292,6 @@ enum LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */ LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */ LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */ - LINUX_MIB_TCPSMALLQUEUEFAILURE, /* TCPSmallQueueFailure */ __LINUX_MIB_MAX }; -- cgit v1.2.3 From 7a10d8c810cfad3e79372d7d1c77899d86cd6662 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Nov 2021 09:01:55 -0800 Subject: net: annotate data-races on txq->xmit_lock_owner syzbot found that __dev_queue_xmit() is reading txq->xmit_lock_owner without annotations. No serious issue there, let's document what is happening there. BUG: KCSAN: data-race in __dev_queue_xmit / __dev_queue_xmit write to 0xffff888139d09484 of 4 bytes by interrupt on cpu 0: __netif_tx_unlock include/linux/netdevice.h:4437 [inline] __dev_queue_xmit+0x948/0xf70 net/core/dev.c:4229 dev_queue_xmit_accel+0x19/0x20 net/core/dev.c:4265 macvlan_queue_xmit drivers/net/macvlan.c:543 [inline] macvlan_start_xmit+0x2b3/0x3d0 drivers/net/macvlan.c:567 __netdev_start_xmit include/linux/netdevice.h:4987 [inline] netdev_start_xmit include/linux/netdevice.h:5001 [inline] xmit_one+0x105/0x2f0 net/core/dev.c:3590 dev_hard_start_xmit+0x72/0x120 net/core/dev.c:3606 sch_direct_xmit+0x1b2/0x7c0 net/sched/sch_generic.c:342 __dev_xmit_skb+0x83d/0x1370 net/core/dev.c:3817 __dev_queue_xmit+0x590/0xf70 net/core/dev.c:4194 dev_queue_xmit+0x13/0x20 net/core/dev.c:4259 neigh_hh_output include/net/neighbour.h:511 [inline] neigh_output include/net/neighbour.h:525 [inline] ip6_finish_output2+0x995/0xbb0 net/ipv6/ip6_output.c:126 __ip6_finish_output net/ipv6/ip6_output.c:191 [inline] ip6_finish_output+0x444/0x4c0 net/ipv6/ip6_output.c:201 NF_HOOK_COND include/linux/netfilter.h:296 [inline] ip6_output+0x10e/0x210 net/ipv6/ip6_output.c:224 dst_output include/net/dst.h:450 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ndisc_send_skb+0x486/0x610 net/ipv6/ndisc.c:508 ndisc_send_rs+0x3b0/0x3e0 net/ipv6/ndisc.c:702 addrconf_rs_timer+0x370/0x540 net/ipv6/addrconf.c:3898 call_timer_fn+0x2e/0x240 kernel/time/timer.c:1421 expire_timers+0x116/0x240 kernel/time/timer.c:1466 __run_timers+0x368/0x410 kernel/time/timer.c:1734 run_timer_softirq+0x2e/0x60 kernel/time/timer.c:1747 __do_softirq+0x158/0x2de kernel/softirq.c:558 __irq_exit_rcu kernel/softirq.c:636 [inline] irq_exit_rcu+0x37/0x70 kernel/softirq.c:648 sysvec_apic_timer_interrupt+0x3e/0xb0 arch/x86/kernel/apic/apic.c:1097 asm_sysvec_apic_timer_interrupt+0x12/0x20 read to 0xffff888139d09484 of 4 bytes by interrupt on cpu 1: __dev_queue_xmit+0x5e3/0xf70 net/core/dev.c:4213 dev_queue_xmit_accel+0x19/0x20 net/core/dev.c:4265 macvlan_queue_xmit drivers/net/macvlan.c:543 [inline] macvlan_start_xmit+0x2b3/0x3d0 drivers/net/macvlan.c:567 __netdev_start_xmit include/linux/netdevice.h:4987 [inline] netdev_start_xmit include/linux/netdevice.h:5001 [inline] xmit_one+0x105/0x2f0 net/core/dev.c:3590 dev_hard_start_xmit+0x72/0x120 net/core/dev.c:3606 sch_direct_xmit+0x1b2/0x7c0 net/sched/sch_generic.c:342 __dev_xmit_skb+0x83d/0x1370 net/core/dev.c:3817 __dev_queue_xmit+0x590/0xf70 net/core/dev.c:4194 dev_queue_xmit+0x13/0x20 net/core/dev.c:4259 neigh_resolve_output+0x3db/0x410 net/core/neighbour.c:1523 neigh_output include/net/neighbour.h:527 [inline] ip6_finish_output2+0x9be/0xbb0 net/ipv6/ip6_output.c:126 __ip6_finish_output net/ipv6/ip6_output.c:191 [inline] ip6_finish_output+0x444/0x4c0 net/ipv6/ip6_output.c:201 NF_HOOK_COND include/linux/netfilter.h:296 [inline] ip6_output+0x10e/0x210 net/ipv6/ip6_output.c:224 dst_output include/net/dst.h:450 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ndisc_send_skb+0x486/0x610 net/ipv6/ndisc.c:508 ndisc_send_rs+0x3b0/0x3e0 net/ipv6/ndisc.c:702 addrconf_rs_timer+0x370/0x540 net/ipv6/addrconf.c:3898 call_timer_fn+0x2e/0x240 kernel/time/timer.c:1421 expire_timers+0x116/0x240 kernel/time/timer.c:1466 __run_timers+0x368/0x410 kernel/time/timer.c:1734 run_timer_softirq+0x2e/0x60 kernel/time/timer.c:1747 __do_softirq+0x158/0x2de kernel/softirq.c:558 __irq_exit_rcu kernel/softirq.c:636 [inline] irq_exit_rcu+0x37/0x70 kernel/softirq.c:648 sysvec_apic_timer_interrupt+0x8d/0xb0 arch/x86/kernel/apic/apic.c:1097 asm_sysvec_apic_timer_interrupt+0x12/0x20 kcsan_setup_watchpoint+0x94/0x420 kernel/kcsan/core.c:443 folio_test_anon include/linux/page-flags.h:581 [inline] PageAnon include/linux/page-flags.h:586 [inline] zap_pte_range+0x5ac/0x10e0 mm/memory.c:1347 zap_pmd_range mm/memory.c:1467 [inline] zap_pud_range mm/memory.c:1496 [inline] zap_p4d_range mm/memory.c:1517 [inline] unmap_page_range+0x2dc/0x3d0 mm/memory.c:1538 unmap_single_vma+0x157/0x210 mm/memory.c:1583 unmap_vmas+0xd0/0x180 mm/memory.c:1615 exit_mmap+0x23d/0x470 mm/mmap.c:3170 __mmput+0x27/0x1b0 kernel/fork.c:1113 mmput+0x3d/0x50 kernel/fork.c:1134 exit_mm+0xdb/0x170 kernel/exit.c:507 do_exit+0x608/0x17a0 kernel/exit.c:819 do_group_exit+0xce/0x180 kernel/exit.c:929 get_signal+0xfc3/0x1550 kernel/signal.c:2852 arch_do_signal_or_restart+0x8c/0x2e0 arch/x86/kernel/signal.c:868 handle_signal_work kernel/entry/common.c:148 [inline] exit_to_user_mode_loop kernel/entry/common.c:172 [inline] exit_to_user_mode_prepare+0x113/0x190 kernel/entry/common.c:207 __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline] syscall_exit_to_user_mode+0x20/0x40 kernel/entry/common.c:300 do_syscall_64+0x50/0xd0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x00000000 -> 0xffffffff Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 28712 Comm: syz-executor.0 Tainted: G W 5.16.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20211130170155.2331929-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3ec42495a43a..be5cb3360b94 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4404,7 +4404,8 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) { spin_lock(&txq->_xmit_lock); - txq->xmit_lock_owner = cpu; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, cpu); } static inline bool __netif_tx_acquire(struct netdev_queue *txq) @@ -4421,26 +4422,32 @@ static inline void __netif_tx_release(struct netdev_queue *txq) static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); - txq->xmit_lock_owner = smp_processor_id(); + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } static inline bool __netif_tx_trylock(struct netdev_queue *txq) { bool ok = spin_trylock(&txq->_xmit_lock); - if (likely(ok)) - txq->xmit_lock_owner = smp_processor_id(); + + if (likely(ok)) { + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); + } return ok; } static inline void __netif_tx_unlock(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock(&txq->_xmit_lock); } static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock_bh(&txq->_xmit_lock); } -- cgit v1.2.3 From a37a0ee4d25c152a087c5e913b76f207eaebdb54 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Nov 2021 10:29:39 -0800 Subject: net: avoid uninit-value from tcp_conn_request A recent change triggers a KMSAN warning, because request sockets do not initialize @sk_rx_queue_mapping field. Add sk_rx_queue_update() helper to make our intent clear. BUG: KMSAN: uninit-value in sk_rx_queue_set include/net/sock.h:1922 [inline] BUG: KMSAN: uninit-value in tcp_conn_request+0x3bcc/0x4dc0 net/ipv4/tcp_input.c:6922 sk_rx_queue_set include/net/sock.h:1922 [inline] tcp_conn_request+0x3bcc/0x4dc0 net/ipv4/tcp_input.c:6922 tcp_v4_conn_request+0x218/0x2a0 net/ipv4/tcp_ipv4.c:1528 tcp_rcv_state_process+0x2c5/0x3290 net/ipv4/tcp_input.c:6406 tcp_v4_do_rcv+0xb4e/0x1330 net/ipv4/tcp_ipv4.c:1738 tcp_v4_rcv+0x468d/0x4ed0 net/ipv4/tcp_ipv4.c:2100 ip_protocol_deliver_rcu+0x760/0x10b0 net/ipv4/ip_input.c:204 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ip_local_deliver+0x584/0x8c0 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:460 [inline] ip_sublist_rcv_finish net/ipv4/ip_input.c:551 [inline] ip_list_rcv_finish net/ipv4/ip_input.c:601 [inline] ip_sublist_rcv+0x11fd/0x1520 net/ipv4/ip_input.c:609 ip_list_rcv+0x95f/0x9a0 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5505 [inline] __netif_receive_skb_list_core+0xe34/0x1240 net/core/dev.c:5553 __netif_receive_skb_list+0x7fc/0x960 net/core/dev.c:5605 netif_receive_skb_list_internal+0x868/0xde0 net/core/dev.c:5696 gro_normal_list net/core/dev.c:5850 [inline] napi_complete_done+0x579/0xdd0 net/core/dev.c:6587 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0x17b6/0x2350 drivers/net/virtio_net.c:1557 __napi_poll+0x14e/0xbc0 net/core/dev.c:7020 napi_poll net/core/dev.c:7087 [inline] net_rx_action+0x824/0x1880 net/core/dev.c:7174 __do_softirq+0x1fe/0x7eb kernel/softirq.c:558 invoke_softirq+0xa4/0x130 kernel/softirq.c:432 __irq_exit_rcu kernel/softirq.c:636 [inline] irq_exit_rcu+0x76/0x130 kernel/softirq.c:648 common_interrupt+0xb6/0xd0 arch/x86/kernel/irq.c:240 asm_common_interrupt+0x1e/0x40 smap_restore arch/x86/include/asm/smap.h:67 [inline] get_shadow_origin_ptr mm/kmsan/instrumentation.c:31 [inline] __msan_metadata_ptr_for_load_1+0x28/0x30 mm/kmsan/instrumentation.c:63 tomoyo_check_acl+0x1b0/0x630 security/tomoyo/domain.c:173 tomoyo_path_permission security/tomoyo/file.c:586 [inline] tomoyo_check_open_permission+0x61f/0xe10 security/tomoyo/file.c:777 tomoyo_file_open+0x24f/0x2d0 security/tomoyo/tomoyo.c:311 security_file_open+0xb1/0x1f0 security/security.c:1635 do_dentry_open+0x4e4/0x1bf0 fs/open.c:809 vfs_open+0xaf/0xe0 fs/open.c:957 do_open fs/namei.c:3426 [inline] path_openat+0x52f1/0x5dd0 fs/namei.c:3559 do_filp_open+0x306/0x760 fs/namei.c:3586 do_sys_openat2+0x263/0x8f0 fs/open.c:1212 do_sys_open fs/open.c:1228 [inline] __do_sys_open fs/open.c:1236 [inline] __se_sys_open fs/open.c:1232 [inline] __x64_sys_open+0x314/0x380 fs/open.c:1232 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Uninit was created at: __alloc_pages+0xbc7/0x10a0 mm/page_alloc.c:5409 alloc_pages+0x8a5/0xb80 alloc_slab_page mm/slub.c:1810 [inline] allocate_slab+0x287/0x1c20 mm/slub.c:1947 new_slab mm/slub.c:2010 [inline] ___slab_alloc+0xbdf/0x1e90 mm/slub.c:3039 __slab_alloc mm/slub.c:3126 [inline] slab_alloc_node mm/slub.c:3217 [inline] slab_alloc mm/slub.c:3259 [inline] kmem_cache_alloc+0xbb3/0x11c0 mm/slub.c:3264 reqsk_alloc include/net/request_sock.h:91 [inline] inet_reqsk_alloc+0xaf/0x8b0 net/ipv4/tcp_input.c:6712 tcp_conn_request+0x910/0x4dc0 net/ipv4/tcp_input.c:6852 tcp_v4_conn_request+0x218/0x2a0 net/ipv4/tcp_ipv4.c:1528 tcp_rcv_state_process+0x2c5/0x3290 net/ipv4/tcp_input.c:6406 tcp_v4_do_rcv+0xb4e/0x1330 net/ipv4/tcp_ipv4.c:1738 tcp_v4_rcv+0x468d/0x4ed0 net/ipv4/tcp_ipv4.c:2100 ip_protocol_deliver_rcu+0x760/0x10b0 net/ipv4/ip_input.c:204 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ip_local_deliver+0x584/0x8c0 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:460 [inline] ip_sublist_rcv_finish net/ipv4/ip_input.c:551 [inline] ip_list_rcv_finish net/ipv4/ip_input.c:601 [inline] ip_sublist_rcv+0x11fd/0x1520 net/ipv4/ip_input.c:609 ip_list_rcv+0x95f/0x9a0 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5505 [inline] __netif_receive_skb_list_core+0xe34/0x1240 net/core/dev.c:5553 __netif_receive_skb_list+0x7fc/0x960 net/core/dev.c:5605 netif_receive_skb_list_internal+0x868/0xde0 net/core/dev.c:5696 gro_normal_list net/core/dev.c:5850 [inline] napi_complete_done+0x579/0xdd0 net/core/dev.c:6587 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0x17b6/0x2350 drivers/net/virtio_net.c:1557 __napi_poll+0x14e/0xbc0 net/core/dev.c:7020 napi_poll net/core/dev.c:7087 [inline] net_rx_action+0x824/0x1880 net/core/dev.c:7174 __do_softirq+0x1fe/0x7eb kernel/softirq.c:558 Fixes: 342159ee394d ("net: avoid dirtying sk->sk_rx_queue_mapping") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20211130182939.2584764-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/busy_poll.h | 2 +- include/net/sock.h | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 4202c609bb0b..7994455ec714 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -133,7 +133,7 @@ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) if (unlikely(READ_ONCE(sk->sk_napi_id) != skb->napi_id)) WRITE_ONCE(sk->sk_napi_id, skb->napi_id); #endif - sk_rx_queue_set(sk, skb); + sk_rx_queue_update(sk, skb); } static inline void __sk_mark_napi_id_once(struct sock *sk, unsigned int napi_id) diff --git a/include/net/sock.h b/include/net/sock.h index 715cdb4b2b79..bea21ff70e74 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1913,18 +1913,31 @@ static inline int sk_tx_queue_get(const struct sock *sk) return -1; } -static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) +static inline void __sk_rx_queue_set(struct sock *sk, + const struct sk_buff *skb, + bool force_set) { #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING if (skb_rx_queue_recorded(skb)) { u16 rx_queue = skb_get_rx_queue(skb); - if (unlikely(READ_ONCE(sk->sk_rx_queue_mapping) != rx_queue)) + if (force_set || + unlikely(READ_ONCE(sk->sk_rx_queue_mapping) != rx_queue)) WRITE_ONCE(sk->sk_rx_queue_mapping, rx_queue); } #endif } +static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) +{ + __sk_rx_queue_set(sk, skb, true); +} + +static inline void sk_rx_queue_update(struct sock *sk, const struct sk_buff *skb) +{ + __sk_rx_queue_set(sk, skb, false); +} + static inline void sk_rx_queue_clear(struct sock *sk) { #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING -- cgit v1.2.3 From 57b2b72ac1fc5d55cf3b13207942c109f1a65cb5 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 29 Nov 2021 09:57:59 -0700 Subject: mm, slab: Remove compiler check in __kmalloc_index The minimum supported version of LLVM has been raised to 11.0.0, meaning this check is always true, so it can be dropped. Signed-off-by: Nathan Chancellor Reviewed-by: Miguel Ojeda Reviewed-by: Mark Brown Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Signed-off-by: Masahiro Yamada --- include/linux/slab.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 181045148b06..d3fb5ac71c24 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -411,8 +411,7 @@ static __always_inline unsigned int __kmalloc_index(size_t size, if (size <= 16 * 1024 * 1024) return 24; if (size <= 32 * 1024 * 1024) return 25; - if ((IS_ENABLED(CONFIG_CC_IS_GCC) || CONFIG_CLANG_VERSION >= 110000) - && !IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant) + if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant) BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()"); else BUG(); -- cgit v1.2.3 From 4ff22f487f8c26b99cbe1678344595734c001a39 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 30 Nov 2021 10:52:55 +0100 Subject: drm: Return error codes from struct drm_driver.gem_create_object GEM helper libraries use struct drm_driver.gem_create_object to let drivers override GEM object allocation. On failure, the call returns NULL. Change the semantics to make the calls return a pointer-encoded error. This aligns the callback with its callers. Fixes the ingenic driver, which already returns an error pointer. Also update the callers to handle the involved types more strictly. Signed-off-by: Thomas Zimmermann Reviewed-by: Steven Price Acked-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20211130095255.26710-1-tzimmermann@suse.de --- include/drm/drm_drv.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index da0c836fe8e1..f6159acb8856 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -291,8 +291,9 @@ struct drm_driver { /** * @gem_create_object: constructor for gem objects * - * Hook for allocating the GEM object struct, for use by the CMA and - * SHMEM GEM helpers. + * Hook for allocating the GEM object struct, for use by the CMA + * and SHMEM GEM helpers. Returns a GEM object on success, or an + * ERR_PTR()-encoded error code otherwise. */ struct drm_gem_object *(*gem_create_object)(struct drm_device *dev, size_t size); -- cgit v1.2.3 From 213f5f8f31f10aa1e83187ae20fb7fa4e626b724 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Dec 2021 18:26:35 -0800 Subject: ipv4: convert fib_num_tclassid_users to atomic_t Before commit faa041a40b9f ("ipv4: Create cleanup helper for fib_nh") changes to net->ipv4.fib_num_tclassid_users were protected by RTNL. After the change, this is no longer the case, as free_fib_info_rcu() runs after rcu grace period, without rtnl being held. Fixes: faa041a40b9f ("ipv4: Create cleanup helper for fib_nh") Signed-off-by: Eric Dumazet Cc: David Ahern Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- include/net/netns/ipv4.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ab5348e57db1..3417ba2d27ad 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -438,7 +438,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { - return net->ipv4.fib_num_tclassid_users; + return atomic_read(&net->ipv4.fib_num_tclassid_users); } #else static inline int fib_num_tclassid_users(struct net *net) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2f65701a43c9..6c5b2efc4f17 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -65,7 +65,7 @@ struct netns_ipv4 { bool fib_has_custom_local_routes; bool fib_offload_disabled; #ifdef CONFIG_IP_ROUTE_CLASSID - int fib_num_tclassid_users; + atomic_t fib_num_tclassid_users; #endif struct hlist_head *fib_table_hash; struct sock *fibnl; -- cgit v1.2.3 From 72f6a45202f20f0e1a46b0acb7803369cc53d0b8 Mon Sep 17 00:00:00 2001 From: Xiayu Zhang Date: Wed, 1 Dec 2021 10:57:13 +0800 Subject: Fix Comment of ETH_P_802_3_MIN The description of ETH_P_802_3_MIN is misleading. The value of EthernetType in Ethernet II frame is more than 0x0600, the value of Length in 802.3 frame is less than 0x0600. Signed-off-by: Xiayu Zhang Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 5da4ee234e0b..c0c2f3ed5729 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -117,7 +117,7 @@ #define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */ #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */ -#define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value +#define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is more than this value * then the frame is Ethernet II. Else it is 802.3 */ /* -- cgit v1.2.3 From e7f2be115f0746b969c0df14c0d182f65f005ca5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 26 Oct 2021 16:10:55 +0200 Subject: sched/cputime: Fix getrusage(RUSAGE_THREAD) with nohz_full getrusage(RUSAGE_THREAD) with nohz_full may return shorter utime/stime than the actual time. task_cputime_adjusted() snapshots utime and stime and then adjust their sum to match the scheduler maintained cputime.sum_exec_runtime. Unfortunately in nohz_full, sum_exec_runtime is only updated once per second in the worst case, causing a discrepancy against utime and stime that can be updated anytime by the reader using vtime. To fix this situation, perform an update of cputime.sum_exec_runtime when the cputime snapshot reports the task as actually running while the tick is disabled. The related overhead is then contained within the relevant situations. Reported-by: Hasegawa Hitomi Signed-off-by: Frederic Weisbecker Signed-off-by: Hasegawa Hitomi Signed-off-by: Thomas Gleixner Tested-by: Masayoshi Mizuma Acked-by: Phil Auld Link: https://lore.kernel.org/r/20211026141055.57358-3-frederic@kernel.org --- include/linux/sched/cputime.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 6c9f19a33865..ce3c58286062 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -18,15 +18,16 @@ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -extern void task_cputime(struct task_struct *t, +extern bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime); extern u64 task_gtime(struct task_struct *t); #else -static inline void task_cputime(struct task_struct *t, +static inline bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime) { *utime = t->utime; *stime = t->stime; + return false; } static inline u64 task_gtime(struct task_struct *t) -- cgit v1.2.3 From f83baa0cb6cfc92ebaf7f9d3a99d7e34f2e77a8a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Dec 2021 19:35:01 +0100 Subject: HID: add hid_is_usb() function to make it simpler for USB detection A number of HID drivers already call hid_is_using_ll_driver() but only for the detection of if this is a USB device or not. Make this more obvious by creating hid_is_usb() and calling the function that way. Also converts the existing hid_is_using_ll_driver() functions to use the new call. Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: linux-input@vger.kernel.org Cc: stable@vger.kernel.org Tested-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211201183503.2373082-1-gregkh@linuxfoundation.org --- include/linux/hid.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 9e067f937dbc..f453be385bd4 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -840,6 +840,11 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev, return hdev->ll_driver == driver; } +static inline bool hid_is_usb(struct hid_device *hdev) +{ + return hid_is_using_ll_driver(hdev, &usb_hid_driver); +} + #define PM_HINT_FULLON 1<<5 #define PM_HINT_NORMAL 1<<1 -- cgit v1.2.3 From 9e3562080950b6e3fe38a5e34ddd5b1c618f2019 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 2 Dec 2021 10:53:33 +0100 Subject: HID: add suspend/resume helpers There is a lot of duplication of code in the HID low level drivers. Better have everything in one place so we can eventually extend it in a generic way. Signed-off-by: Benjamin Tissoires Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20211202095334.14399-4-benjamin.tissoires@redhat.com --- include/linux/hid.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 9e067f937dbc..ebe3ec98db6b 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -923,6 +923,16 @@ s32 hid_snto32(__u32 value, unsigned n); __u32 hid_field_extract(const struct hid_device *hid, __u8 *report, unsigned offset, unsigned n); +#ifdef CONFIG_PM +int hid_driver_suspend(struct hid_device *hdev, pm_message_t state); +int hid_driver_reset_resume(struct hid_device *hdev); +int hid_driver_resume(struct hid_device *hdev); +#else +static inline int hid_driver_suspend(struct hid_device *hdev, pm_message_t state) { return 0; } +static inline int hid_driver_reset_resume(struct hid_device *hdev) { return 0; } +static inline int hid_driver_resume(struct hid_device *hdev) { return 0; } +#endif + /** * hid_device_io_start - enable HID input during probe, remove * -- cgit v1.2.3 From f65a0b1f3e79444bba9ac56435eeb32db85ab2c9 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 2 Dec 2021 10:53:34 +0100 Subject: HID: do not inline some hid_hw_ functions We don't gain much by having them as inline, and it actually prevents us to attach a probe to those helpers. Signed-off-by: Benjamin Tissoires Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20211202095334.14399-5-benjamin.tissoires@redhat.com --- include/linux/hid.h | 68 +++++------------------------------------------------ 1 file changed, 6 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index ebe3ec98db6b..b2fea7fc54a1 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1066,6 +1066,12 @@ int __must_check hid_hw_start(struct hid_device *hdev, void hid_hw_stop(struct hid_device *hdev); int __must_check hid_hw_open(struct hid_device *hdev); void hid_hw_close(struct hid_device *hdev); +void hid_hw_request(struct hid_device *hdev, + struct hid_report *report, int reqtype); +int hid_hw_raw_request(struct hid_device *hdev, + unsigned char reportnum, __u8 *buf, + size_t len, unsigned char rtype, int reqtype); +int hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len); /** * hid_hw_power - requests underlying HW to go into given power mode @@ -1083,68 +1089,6 @@ static inline int hid_hw_power(struct hid_device *hdev, int level) } -/** - * hid_hw_request - send report request to device - * - * @hdev: hid device - * @report: report to send - * @reqtype: hid request type - */ -static inline void hid_hw_request(struct hid_device *hdev, - struct hid_report *report, int reqtype) -{ - if (hdev->ll_driver->request) - return hdev->ll_driver->request(hdev, report, reqtype); - - __hid_request(hdev, report, reqtype); -} - -/** - * hid_hw_raw_request - send report request to device - * - * @hdev: hid device - * @reportnum: report ID - * @buf: in/out data to transfer - * @len: length of buf - * @rtype: HID report type - * @reqtype: HID_REQ_GET_REPORT or HID_REQ_SET_REPORT - * - * Return: count of data transferred, negative if error - * - * Same behavior as hid_hw_request, but with raw buffers instead. - */ -static inline int hid_hw_raw_request(struct hid_device *hdev, - unsigned char reportnum, __u8 *buf, - size_t len, unsigned char rtype, int reqtype) -{ - if (len < 1 || len > HID_MAX_BUFFER_SIZE || !buf) - return -EINVAL; - - return hdev->ll_driver->raw_request(hdev, reportnum, buf, len, - rtype, reqtype); -} - -/** - * hid_hw_output_report - send output report to device - * - * @hdev: hid device - * @buf: raw data to transfer - * @len: length of buf - * - * Return: count of data transferred, negative if error - */ -static inline int hid_hw_output_report(struct hid_device *hdev, __u8 *buf, - size_t len) -{ - if (len < 1 || len > HID_MAX_BUFFER_SIZE || !buf) - return -EINVAL; - - if (hdev->ll_driver->output_report) - return hdev->ll_driver->output_report(hdev, buf, len); - - return -ENOSYS; -} - /** * hid_hw_idle - send idle request to device * -- cgit v1.2.3 From 8293eb995f349aed28006792cad4cb48091919dd Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 1 Dec 2021 10:10:25 -0800 Subject: bpf: Rename btf_member accessors. Rename btf_member_bit_offset() and btf_member_bitfield_size() to avoid conflicts with similarly named helpers in libbpf's btf.h. Rename the kernel helpers, since libbpf helpers are part of uapi. Suggested-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211201181040.23337-3-alexei.starovoitov@gmail.com --- include/linux/btf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index 203eef993d76..956f70388f69 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -194,15 +194,15 @@ static inline bool btf_type_kflag(const struct btf_type *t) return BTF_INFO_KFLAG(t->info); } -static inline u32 btf_member_bit_offset(const struct btf_type *struct_type, - const struct btf_member *member) +static inline u32 __btf_member_bit_offset(const struct btf_type *struct_type, + const struct btf_member *member) { return btf_type_kflag(struct_type) ? BTF_MEMBER_BIT_OFFSET(member->offset) : member->offset; } -static inline u32 btf_member_bitfield_size(const struct btf_type *struct_type, - const struct btf_member *member) +static inline u32 __btf_member_bitfield_size(const struct btf_type *struct_type, + const struct btf_member *member) { return btf_type_kflag(struct_type) ? BTF_MEMBER_BITFIELD_SIZE(member->offset) : 0; -- cgit v1.2.3 From 29db4bea1d10b73749d7992c1fc9ac13499e8871 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 1 Dec 2021 10:10:26 -0800 Subject: bpf: Prepare relo_core.c for kernel duty. Make relo_core.c to be compiled for the kernel and for user space libbpf. Note the patch is reducing BPF_CORE_SPEC_MAX_LEN from 64 to 32. This is the maximum number of nested structs and arrays. For example: struct sample { int a; struct { int b[10]; }; }; struct sample *s = ...; int *y = &s->b[5]; This field access is encoded as "0:1:0:5" and spec len is 4. The follow up patch might bump it back to 64. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211201181040.23337-4-alexei.starovoitov@gmail.com --- include/linux/btf.h | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index 956f70388f69..acef6ef28768 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -144,6 +144,53 @@ static inline bool btf_type_is_enum(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; } +static inline bool str_is_empty(const char *s) +{ + return !s || !s[0]; +} + +static inline u16 btf_kind(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info); +} + +static inline bool btf_is_enum(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_ENUM; +} + +static inline bool btf_is_composite(const struct btf_type *t) +{ + u16 kind = btf_kind(t); + + return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; +} + +static inline bool btf_is_array(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_ARRAY; +} + +static inline bool btf_is_int(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_INT; +} + +static inline bool btf_is_ptr(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_PTR; +} + +static inline u8 btf_int_offset(const struct btf_type *t) +{ + return BTF_INT_OFFSET(*(u32 *)(t + 1)); +} + +static inline u8 btf_int_encoding(const struct btf_type *t) +{ + return BTF_INT_ENCODING(*(u32 *)(t + 1)); +} + static inline bool btf_type_is_scalar(const struct btf_type *t) { return btf_type_is_int(t) || btf_type_is_enum(t); @@ -184,6 +231,11 @@ static inline u16 btf_type_vlen(const struct btf_type *t) return BTF_INFO_VLEN(t->info); } +static inline u16 btf_vlen(const struct btf_type *t) +{ + return btf_type_vlen(t); +} + static inline u16 btf_func_linkage(const struct btf_type *t) { return BTF_INFO_VLEN(t->info); @@ -208,11 +260,40 @@ static inline u32 __btf_member_bitfield_size(const struct btf_type *struct_type, : 0; } +static inline struct btf_member *btf_members(const struct btf_type *t) +{ + return (struct btf_member *)(t + 1); +} + +static inline u32 btf_member_bit_offset(const struct btf_type *t, u32 member_idx) +{ + const struct btf_member *m = btf_members(t) + member_idx; + + return __btf_member_bit_offset(t, m); +} + +static inline u32 btf_member_bitfield_size(const struct btf_type *t, u32 member_idx) +{ + const struct btf_member *m = btf_members(t) + member_idx; + + return __btf_member_bitfield_size(t, m); +} + static inline const struct btf_member *btf_type_member(const struct btf_type *t) { return (const struct btf_member *)(t + 1); } +static inline struct btf_array *btf_array(const struct btf_type *t) +{ + return (struct btf_array *)(t + 1); +} + +static inline struct btf_enum *btf_enum(const struct btf_type *t) +{ + return (struct btf_enum *)(t + 1); +} + static inline const struct btf_var_secinfo *btf_type_var_secinfo( const struct btf_type *t) { -- cgit v1.2.3 From 46334a0cd21bed70d6f1ddef1464f75a0ebe1774 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 1 Dec 2021 10:10:27 -0800 Subject: bpf: Define enum bpf_core_relo_kind as uapi. enum bpf_core_relo_kind is generated by llvm and processed by libbpf. It's a de-facto uapi. With CO-RE in the kernel the bpf_core_relo_kind values become uapi de-jure. Also rename them with BPF_CORE_ prefix to distinguish from conflicting names in bpf_core_read.h. The enums bpf_field_info_kind, bpf_type_id_kind, bpf_type_info_kind, bpf_enum_value_kind are passing different values from bpf program into llvm. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211201181040.23337-5-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 211b43afd0fb..9e66b1880020 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6374,4 +6374,23 @@ enum { BTF_F_ZERO = (1ULL << 3), }; +/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value + * has to be adjusted by relocations. It is emitted by llvm and passed to + * libbpf and later to the kernel. + */ +enum bpf_core_relo_kind { + BPF_CORE_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_CORE_FIELD_BYTE_SIZE = 1, /* field size in bytes */ + BPF_CORE_FIELD_EXISTS = 2, /* field existence in target kernel */ + BPF_CORE_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ + BPF_CORE_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ + BPF_CORE_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ + BPF_CORE_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ + BPF_CORE_TYPE_ID_TARGET = 7, /* type ID in target kernel */ + BPF_CORE_TYPE_EXISTS = 8, /* type existence in target kernel */ + BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */ + BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ + BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From fbd94c7afcf99c9f3b1ba1168657ecc428eb2c8d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 1 Dec 2021 10:10:28 -0800 Subject: bpf: Pass a set of bpf_core_relo-s to prog_load command. struct bpf_core_relo is generated by llvm and processed by libbpf. It's a de-facto uapi. With CO-RE in the kernel the struct bpf_core_relo becomes uapi de-jure. Add an ability to pass a set of 'struct bpf_core_relo' to prog_load command and let the kernel perform CO-RE relocations. Note the struct bpf_line_info and struct bpf_func_info have the same layout when passed from LLVM to libbpf and from libbpf to the kernel except "insn_off" fields means "byte offset" when LLVM generates it. Then libbpf converts it to "insn index" to pass to the kernel. The struct bpf_core_relo's "insn_off" field is always "byte offset". Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211201181040.23337-6-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 8 +++++++ include/uapi/linux/bpf.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cad0829710be..8bbf08fbab66 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1732,6 +1732,14 @@ bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); const struct btf_func_model * bpf_jit_find_kfunc_model(const struct bpf_prog *prog, const struct bpf_insn *insn); +struct bpf_core_ctx { + struct bpf_verifier_log *log; + const struct btf *btf; +}; + +int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, + int relo_idx, void *insn); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9e66b1880020..c26871263f1f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1342,8 +1342,10 @@ union bpf_attr { /* or valid module BTF object fd or 0 to attach to vmlinux */ __u32 attach_btf_obj_fd; }; - __u32 :32; /* pad */ + __u32 core_relo_cnt; /* number of bpf_core_relo */ __aligned_u64 fd_array; /* array of FDs */ + __aligned_u64 core_relos; + __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -6393,4 +6395,59 @@ enum bpf_core_relo_kind { BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ }; +/* + * "struct bpf_core_relo" is used to pass relocation data form LLVM to libbpf + * and from libbpf to the kernel. + * + * CO-RE relocation captures the following data: + * - insn_off - instruction offset (in bytes) within a BPF program that needs + * its insn->imm field to be relocated with actual field info; + * - type_id - BTF type ID of the "root" (containing) entity of a relocatable + * type or field; + * - access_str_off - offset into corresponding .BTF string section. String + * interpretation depends on specific relocation kind: + * - for field-based relocations, string encodes an accessed field using + * a sequence of field and array indices, separated by colon (:). It's + * conceptually very close to LLVM's getelementptr ([0]) instruction's + * arguments for identifying offset to a field. + * - for type-based relocations, strings is expected to be just "0"; + * - for enum value-based relocations, string contains an index of enum + * value within its enum type; + * - kind - one of enum bpf_core_relo_kind; + * + * Example: + * struct sample { + * int a; + * struct { + * int b[10]; + * }; + * }; + * + * struct sample *s = ...; + * int *x = &s->a; // encoded as "0:0" (a is field #0) + * int *y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, + * // b is field #0 inside anon struct, accessing elem #5) + * int *z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) + * + * type_id for all relocs in this example will capture BTF type id of + * `struct sample`. + * + * Such relocation is emitted when using __builtin_preserve_access_index() + * Clang built-in, passing expression that captures field address, e.g.: + * + * bpf_probe_read(&dst, sizeof(dst), + * __builtin_preserve_access_index(&src->a.b.c)); + * + * In this case Clang will emit field relocation recording necessary data to + * be able to find offset of embedded `a.b.c` field within `src` struct. + * + * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction + */ +struct bpf_core_relo { + __u32 insn_off; + __u32 type_id; + __u32 access_str_off; + enum bpf_core_relo_kind kind; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From d9847eb8be3d895b2b5f514fdf3885d47a0b92a2 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 22 Nov 2021 20:17:40 +0530 Subject: bpf: Make CONFIG_DEBUG_INFO_BTF depend upon CONFIG_BPF_SYSCALL Vinicius Costa Gomes reported [0] that build fails when CONFIG_DEBUG_INFO_BTF is enabled and CONFIG_BPF_SYSCALL is disabled. This leads to btf.c not being compiled, and then no symbol being present in vmlinux for the declarations in btf.h. Since BTF is not useful without enabling BPF subsystem, disallow this combination. However, theoretically disabling both now could still fail, as the symbol for kfunc_btf_id_list variables is not available. This isn't a problem as the compiler usually optimizes the whole register/unregister call, but at lower optimization levels it can fail the build in linking stage. Fix that by adding dummy variables so that modules taking address of them still work, but the whole thing is a noop. [0]: https://lore.kernel.org/bpf/20211110205418.332403-1-vinicius.gomes@intel.com Fixes: 14f267d95fe4 ("bpf: btf: Introduce helpers for dynamic BTF set registration") Reported-by: Vinicius Costa Gomes Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211122144742.477787-2-memxor@gmail.com --- include/linux/btf.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index 203eef993d76..0e1b6281fd8f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -245,7 +245,10 @@ struct kfunc_btf_id_set { struct module *owner; }; -struct kfunc_btf_id_list; +struct kfunc_btf_id_list { + struct list_head list; + struct mutex mutex; +}; #ifdef CONFIG_DEBUG_INFO_BTF_MODULES void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, @@ -254,6 +257,9 @@ void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, struct kfunc_btf_id_set *s); bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, struct module *owner); + +extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list; +extern struct kfunc_btf_id_list prog_test_kfunc_list; #else static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, struct kfunc_btf_id_set *s) @@ -268,13 +274,13 @@ static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, { return false; } + +static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused; +static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused; #endif #define DEFINE_KFUNC_BTF_ID_SET(set, name) \ struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \ THIS_MODULE } -extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list; -extern struct kfunc_btf_id_list prog_test_kfunc_list; - #endif -- cgit v1.2.3 From b247f32aecad09e6cf7edff7739e6f2c9dc5fca9 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Thu, 28 Oct 2021 16:03:06 +0300 Subject: net/mlx5: Dynamically resize flow counters query buffer The flow counters bulk query buffer is allocated once during mlx5_fc_init_stats(). For PFs and VFs this buffer usually takes a little more than 512KB of memory, which is aligned to the next power of 2, to 1MB. For SFs, this buffer is reduced and takes around 128 Bytes. The buffer size determines the maximum number of flow counters that can be queried at a time. Thus, having a bigger buffer can improve performance for users that need to query many flow counters. There are cases that don't use many flow counters and don't need a big buffer (e.g. SFs, VFs). Since this size is critical with large scale, in these cases the buffer size should be reduced. In order to reduce memory consumption while maintaining query performance, change the query buffer's allocation scheme to the following: - First allocate the buffer with small initial size. - If the number of counters surpasses the initial size, resize the buffer to the maximum size. The buffer only grows and isn't shrank, because users with many flow counters don't care about the buffer size and we don't want to add resize overhead if the current number of counters drops. This solution is preferable to the current one, which is less accurate and only addresses SFs. Signed-off-by: Avihai Horon Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a623ec635947..78655d8d13a7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -478,6 +478,10 @@ struct mlx5_fc_stats { unsigned long next_query; unsigned long sampling_interval; /* jiffies */ u32 *bulk_query_out; + int bulk_query_len; + size_t num_counters; + bool bulk_query_alloc_failed; + unsigned long next_bulk_query_alloc; struct mlx5_fc_pool fc_pool; }; -- cgit v1.2.3 From e2748ad5257754a47376e28c0f9dda4f5c1e5ca3 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 2 Nov 2021 16:02:00 -0600 Subject: mtd: remove unused header file Commit d24dbd7541ff ("mtd: maps: Get rid of the latch-addr-flash driver") removed the last user of but left the header file behind. Nothing uses this file, delete it now. Cc: Boris Brezillon Cc: Miquel Raynal Cc: Richard Weinberger Cc: Vignesh Raghavendra Cc: linux-mtd@lists.infradead.org Signed-off-by: Jonathan Corbet Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211102220203.940290-7-corbet@lwn.net --- include/linux/mtd/latch-addr-flash.h | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 include/linux/mtd/latch-addr-flash.h (limited to 'include') diff --git a/include/linux/mtd/latch-addr-flash.h b/include/linux/mtd/latch-addr-flash.h deleted file mode 100644 index e94b8e128074..000000000000 --- a/include/linux/mtd/latch-addr-flash.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Interface for NOR flash driver whose high address lines are latched - * - * Copyright © 2008 MontaVista Software, Inc. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ -#ifndef __LATCH_ADDR_FLASH__ -#define __LATCH_ADDR_FLASH__ - -struct map_info; -struct mtd_partition; - -struct latch_addr_flash_data { - unsigned int width; - unsigned int size; - - int (*init)(void *data, int cs); - void (*done)(void *data); - void (*set_window)(unsigned long offset, void *data); - void *data; - - unsigned int nr_parts; - struct mtd_partition *parts; -}; - -#endif -- cgit v1.2.3 From 03cfda4fa6ea9bea2f30160579a78c2b8c1e616e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Dec 2021 15:37:24 -0800 Subject: tcp: fix another uninit-value (sk_rx_queue_mapping) KMSAN is still not happy [1]. I missed that passive connections do not inherit their sk_rx_queue_mapping values from the request socket, but instead tcp_child_process() is calling sk_mark_napi_id(child, skb) We have many sk_mark_napi_id() callers, so I am providing a new helper, forcing the setting sk_rx_queue_mapping and sk_napi_id. Note that we had no KMSAN report for sk_napi_id because passive connections got a copy of this field from the listener. sk_rx_queue_mapping in the other hand is inside the sk_dontcopy_begin/sk_dontcopy_end so sk_clone_lock() leaves this field uninitialized. We might remove dead code populating req->sk_rx_queue_mapping in the future. [1] BUG: KMSAN: uninit-value in __sk_rx_queue_set include/net/sock.h:1924 [inline] BUG: KMSAN: uninit-value in sk_rx_queue_update include/net/sock.h:1938 [inline] BUG: KMSAN: uninit-value in sk_mark_napi_id include/net/busy_poll.h:136 [inline] BUG: KMSAN: uninit-value in tcp_child_process+0xb42/0x1050 net/ipv4/tcp_minisocks.c:833 __sk_rx_queue_set include/net/sock.h:1924 [inline] sk_rx_queue_update include/net/sock.h:1938 [inline] sk_mark_napi_id include/net/busy_poll.h:136 [inline] tcp_child_process+0xb42/0x1050 net/ipv4/tcp_minisocks.c:833 tcp_v4_rcv+0x3d83/0x4ed0 net/ipv4/tcp_ipv4.c:2066 ip_protocol_deliver_rcu+0x760/0x10b0 net/ipv4/ip_input.c:204 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ip_local_deliver+0x584/0x8c0 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:460 [inline] ip_sublist_rcv_finish net/ipv4/ip_input.c:551 [inline] ip_list_rcv_finish net/ipv4/ip_input.c:601 [inline] ip_sublist_rcv+0x11fd/0x1520 net/ipv4/ip_input.c:609 ip_list_rcv+0x95f/0x9a0 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5505 [inline] __netif_receive_skb_list_core+0xe34/0x1240 net/core/dev.c:5553 __netif_receive_skb_list+0x7fc/0x960 net/core/dev.c:5605 netif_receive_skb_list_internal+0x868/0xde0 net/core/dev.c:5696 gro_normal_list net/core/dev.c:5850 [inline] napi_complete_done+0x579/0xdd0 net/core/dev.c:6587 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0x17b6/0x2350 drivers/net/virtio_net.c:1557 __napi_poll+0x14e/0xbc0 net/core/dev.c:7020 napi_poll net/core/dev.c:7087 [inline] net_rx_action+0x824/0x1880 net/core/dev.c:7174 __do_softirq+0x1fe/0x7eb kernel/softirq.c:558 run_ksoftirqd+0x33/0x50 kernel/softirq.c:920 smpboot_thread_fn+0x616/0xbf0 kernel/smpboot.c:164 kthread+0x721/0x850 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 Uninit was created at: __alloc_pages+0xbc7/0x10a0 mm/page_alloc.c:5409 alloc_pages+0x8a5/0xb80 alloc_slab_page mm/slub.c:1810 [inline] allocate_slab+0x287/0x1c20 mm/slub.c:1947 new_slab mm/slub.c:2010 [inline] ___slab_alloc+0xbdf/0x1e90 mm/slub.c:3039 __slab_alloc mm/slub.c:3126 [inline] slab_alloc_node mm/slub.c:3217 [inline] slab_alloc mm/slub.c:3259 [inline] kmem_cache_alloc+0xbb3/0x11c0 mm/slub.c:3264 sk_prot_alloc+0xeb/0x570 net/core/sock.c:1914 sk_clone_lock+0xd6/0x1940 net/core/sock.c:2118 inet_csk_clone_lock+0x8d/0x6a0 net/ipv4/inet_connection_sock.c:956 tcp_create_openreq_child+0xb1/0x1ef0 net/ipv4/tcp_minisocks.c:453 tcp_v4_syn_recv_sock+0x268/0x2710 net/ipv4/tcp_ipv4.c:1563 tcp_check_req+0x207c/0x2a30 net/ipv4/tcp_minisocks.c:765 tcp_v4_rcv+0x36f5/0x4ed0 net/ipv4/tcp_ipv4.c:2047 ip_protocol_deliver_rcu+0x760/0x10b0 net/ipv4/ip_input.c:204 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ip_local_deliver+0x584/0x8c0 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:460 [inline] ip_sublist_rcv_finish net/ipv4/ip_input.c:551 [inline] ip_list_rcv_finish net/ipv4/ip_input.c:601 [inline] ip_sublist_rcv+0x11fd/0x1520 net/ipv4/ip_input.c:609 ip_list_rcv+0x95f/0x9a0 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5505 [inline] __netif_receive_skb_list_core+0xe34/0x1240 net/core/dev.c:5553 __netif_receive_skb_list+0x7fc/0x960 net/core/dev.c:5605 netif_receive_skb_list_internal+0x868/0xde0 net/core/dev.c:5696 gro_normal_list net/core/dev.c:5850 [inline] napi_complete_done+0x579/0xdd0 net/core/dev.c:6587 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0x17b6/0x2350 drivers/net/virtio_net.c:1557 __napi_poll+0x14e/0xbc0 net/core/dev.c:7020 napi_poll net/core/dev.c:7087 [inline] net_rx_action+0x824/0x1880 net/core/dev.c:7174 __do_softirq+0x1fe/0x7eb kernel/softirq.c:558 Fixes: 342159ee394d ("net: avoid dirtying sk->sk_rx_queue_mapping") Fixes: a37a0ee4d25c ("net: avoid uninit-value from tcp_conn_request") Signed-off-by: Eric Dumazet Reported-by: syzbot Tested-by: Alexander Potapenko Signed-off-by: David S. Miller --- include/net/busy_poll.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 7994455ec714..c4898fcbf923 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -136,6 +136,19 @@ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) sk_rx_queue_update(sk, skb); } +/* Variant of sk_mark_napi_id() for passive flow setup, + * as sk->sk_napi_id and sk->sk_rx_queue_mapping content + * needs to be set. + */ +static inline void sk_mark_napi_id_set(struct sock *sk, + const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + WRITE_ONCE(sk->sk_napi_id, skb->napi_id); +#endif + sk_rx_queue_set(sk, skb); +} + static inline void __sk_mark_napi_id_once(struct sock *sk, unsigned int napi_id) { #ifdef CONFIG_NET_RX_BUSY_POLL -- cgit v1.2.3 From dac8e00fb640e9569cdeefd3ce8a75639e5d0711 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Dec 2021 18:27:18 -0800 Subject: bonding: make tx_rebalance_counter an atomic KCSAN reported a data-race [1] around tx_rebalance_counter which can be accessed from different contexts, without the protection of a lock/mutex. [1] BUG: KCSAN: data-race in bond_alb_init_slave / bond_alb_monitor write to 0xffff888157e8ca24 of 4 bytes by task 7075 on cpu 0: bond_alb_init_slave+0x713/0x860 drivers/net/bonding/bond_alb.c:1613 bond_enslave+0xd94/0x3010 drivers/net/bonding/bond_main.c:1949 do_set_master net/core/rtnetlink.c:2521 [inline] __rtnl_newlink net/core/rtnetlink.c:3475 [inline] rtnl_newlink+0x1298/0x13b0 net/core/rtnetlink.c:3506 rtnetlink_rcv_msg+0x745/0x7e0 net/core/rtnetlink.c:5571 netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2491 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5589 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x5fc/0x6c0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x6e1/0x7d0 net/netlink/af_netlink.c:1916 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] ____sys_sendmsg+0x39a/0x510 net/socket.c:2409 ___sys_sendmsg net/socket.c:2463 [inline] __sys_sendmsg+0x195/0x230 net/socket.c:2492 __do_sys_sendmsg net/socket.c:2501 [inline] __se_sys_sendmsg net/socket.c:2499 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2499 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff888157e8ca24 of 4 bytes by task 1082 on cpu 1: bond_alb_monitor+0x8f/0xc00 drivers/net/bonding/bond_alb.c:1511 process_one_work+0x3fc/0x980 kernel/workqueue.c:2298 worker_thread+0x616/0xa70 kernel/workqueue.c:2445 kthread+0x2c7/0x2e0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 value changed: 0x00000001 -> 0x00000064 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 1082 Comm: kworker/u4:3 Not tainted 5.16.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: bond1 bond_alb_monitor Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/bond_alb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h index f6af76c87a6c..191c36afa1f4 100644 --- a/include/net/bond_alb.h +++ b/include/net/bond_alb.h @@ -126,7 +126,7 @@ struct tlb_slave_info { struct alb_bond_info { struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ u32 unbalanced_load; - int tx_rebalance_counter; + atomic_t tx_rebalance_counter; int lp_counter; /* -------- rlb parameters -------- */ int rlb_enabled; -- cgit v1.2.3 From 2c4dcd7fd57b20a21b65da04d89c38a7217d79cf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 29 Nov 2021 14:03:07 +0100 Subject: topology/sysfs: export die attributes only if an architectures has support The die_id and die_cpus topology sysfs attributes have been added with commit 0e344d8c709f ("cpu/topology: Export die_id") and commit 2e4c54dac7b3 ("topology: Create core_cpus and die_cpus sysfs attributes"). While they are currently only used and useful for x86 they are still present with bogus default values for all architectures. Instead of enforcing such new sysfs attributes to all architectures, make them only optional visible if an architecture opts in by defining both the topology_die_id and topology_die_cpumask attributes. This is similar to what was done when the book and drawer topology levels were introduced: avoid useless and therefore confusing sysfs attributes for architectures which cannot make use of them. This should not break any existing applications, since this is a rather new interface and applications should be able to handle also older kernel versions without such attributes - besides that they contain only useful information for x86. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Heiko Carstens Link: https://lore.kernel.org/r/20211129130309.3256168-2-hca@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- include/linux/topology.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/topology.h b/include/linux/topology.h index 0b3704ad13c8..8d1bdae76230 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -180,6 +180,10 @@ static inline int cpu_to_mem(int cpu) #endif /* [!]CONFIG_HAVE_MEMORYLESS_NODES */ +#if defined(topology_die_id) && defined(topology_die_cpumask) +#define TOPOLOGY_DIE_SYSFS +#endif + #ifndef topology_physical_package_id #define topology_physical_package_id(cpu) ((void)(cpu), -1) #endif -- cgit v1.2.3 From e795707703b32fecdd7467afcc33ff1e92416c05 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 29 Nov 2021 14:03:08 +0100 Subject: topology/sysfs: export cluster attributes only if an architectures has support The cluster_id and cluster_cpus topology sysfs attributes have been added with commit c5e22feffdd7 ("topology: Represent clusters of CPUs within a die"). They are currently only used for x86, arm64, and riscv (via generic arch topology), however they are still present with bogus default values for all other architectures. Instead of enforcing such new sysfs attributes to all architectures, make them only optional visible if an architecture opts in by defining both the topology_cluster_id and topology_cluster_cpumask attributes. This is similar to what was done when the book and drawer topology levels were introduced: avoid useless and therefore confusing sysfs attributes for architectures which cannot make use of them. This should not break any existing applications, since this is a new interface introduced with the v5.16 merge window. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Heiko Carstens Link: https://lore.kernel.org/r/20211129130309.3256168-3-hca@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- include/linux/topology.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/topology.h b/include/linux/topology.h index 8d1bdae76230..d52be69037db 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -183,6 +183,9 @@ static inline int cpu_to_mem(int cpu) #if defined(topology_die_id) && defined(topology_die_cpumask) #define TOPOLOGY_DIE_SYSFS #endif +#if defined(topology_cluster_id) && defined(topology_cluster_cpumask) +#define TOPOLOGY_CLUSTER_SYSFS +#endif #ifndef topology_physical_package_id #define topology_physical_package_id(cpu) ((void)(cpu), -1) -- cgit v1.2.3 From f1045056c726440469d89d23c13734bcd6c0d15b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 29 Nov 2021 14:03:09 +0100 Subject: topology/sysfs: rework book and drawer topology ifdefery Provide default defines for the topology_book_[id|cpumask] and topology_drawer_[id|cpumask] macros just like for each other topology level. This way all topology levels are handled in a similar way. Still the the book and drawer levels are only used on s390, and also the sysfs attributes are only created on s390. However other architectures may opt in if wanted. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Heiko Carstens Link: https://lore.kernel.org/r/20211129130309.3256168-4-hca@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- include/linux/topology.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/topology.h b/include/linux/topology.h index d52be69037db..a6e201758ae9 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -186,6 +186,12 @@ static inline int cpu_to_mem(int cpu) #if defined(topology_cluster_id) && defined(topology_cluster_cpumask) #define TOPOLOGY_CLUSTER_SYSFS #endif +#if defined(topology_book_id) && defined(topology_book_cpumask) +#define TOPOLOGY_BOOK_SYSFS +#endif +#if defined(topology_drawer_id) && defined(topology_drawer_cpumask) +#define TOPOLOGY_DRAWER_SYSFS +#endif #ifndef topology_physical_package_id #define topology_physical_package_id(cpu) ((void)(cpu), -1) @@ -199,6 +205,12 @@ static inline int cpu_to_mem(int cpu) #ifndef topology_core_id #define topology_core_id(cpu) ((void)(cpu), 0) #endif +#ifndef topology_book_id +#define topology_book_id(cpu) ((void)(cpu), -1) +#endif +#ifndef topology_drawer_id +#define topology_drawer_id(cpu) ((void)(cpu), -1) +#endif #ifndef topology_sibling_cpumask #define topology_sibling_cpumask(cpu) cpumask_of(cpu) #endif @@ -211,6 +223,12 @@ static inline int cpu_to_mem(int cpu) #ifndef topology_die_cpumask #define topology_die_cpumask(cpu) cpumask_of(cpu) #endif +#ifndef topology_book_cpumask +#define topology_book_cpumask(cpu) cpumask_of(cpu) +#endif +#ifndef topology_drawer_cpumask +#define topology_drawer_cpumask(cpu) cpumask_of(cpu) +#endif #if defined(CONFIG_SCHED_SMT) && !defined(cpu_smt_mask) static inline const struct cpumask *cpu_smt_mask(int cpu) -- cgit v1.2.3 From c7fdb2404f66131bc9c22e06f712717288826487 Mon Sep 17 00:00:00 2001 From: Abhyuday Godhasara Date: Sun, 28 Nov 2021 23:02:14 -0800 Subject: drivers: soc: xilinx: add xilinx event management driver Xilinx event management driver provides an interface to subscribe or unsubscribe for the event/callback supported by firmware. An agent can use this driver to register for Error Event, Device Event and Suspend callback. This driver only allows one agent per event to do registration. Driver will return an error in case of multiple registration for the same event. This driver gets notification from firmware through TF-A as SGI. During initialization, event manager driver register handler for SGI used for notification. It also provides SGI number info to TF-A by using IOCTL_REGISTER_SGI call to TF-A. After receiving notification from firmware, the driver makes an SMC call to TF-A to get IPI data. From the IPI data provided by TF-A, event manager identified the cause of event and forward that event/callback notification to the respective subscribed driver. After this, in case of Error Event, driver performs unregistration as firmware expecting from agent to do re-registration if the agent wants to get notified on the second occurrence of an error event. Add new IOCTL id IOCTL_REGISTER_SGI = 25 which is used to register SGI on TF-A. Older firmware doesn't have all required support for event handling which is required by the event manager driver. So add check for the register notifier version in the event manager driver. Xilinx event management driver provides support to subscribe for multiple error events with the use of Event Mask in a single call of xlnx_register_event(). Agent driver can provide 'Event' parameter value as ORed of multiple event masks to register single callback for multiple events. For example, to register callback for event=0x1 and event=0x2 for the given node, agent can provide event=0x3 (0x1 | 0x2). It is not possible to register multiple events for different nodes in a single registration call. Also provide support to receive multiple error events as in single notification from firmware and then forward it to subscribed drivers via registered callback one by one. Acked-by: Michal Simek Signed-off-by: Tejas Patel Signed-off-by: Rajan Vaja Signed-off-by: Abhyuday Godhasara Link: https://lore.kernel.org/r/20211129070216.30253-2-abhyuday.godhasara@xilinx.com Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware/xlnx-event-manager.h | 36 +++++++++++++++++++++++++++++ include/linux/firmware/xlnx-zynqmp.h | 2 ++ 2 files changed, 38 insertions(+) create mode 100644 include/linux/firmware/xlnx-event-manager.h (limited to 'include') diff --git a/include/linux/firmware/xlnx-event-manager.h b/include/linux/firmware/xlnx-event-manager.h new file mode 100644 index 000000000000..3f87c4929d21 --- /dev/null +++ b/include/linux/firmware/xlnx-event-manager.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _FIRMWARE_XLNX_EVENT_MANAGER_H_ +#define _FIRMWARE_XLNX_EVENT_MANAGER_H_ + +#include + +#define CB_MAX_PAYLOAD_SIZE (4U) /*In payload maximum 32bytes */ + +/************************** Exported Function *****************************/ + +typedef void (*event_cb_func_t)(const u32 *payload, void *data); + +#if IS_REACHABLE(CONFIG_XLNX_EVENT_MANAGER) +int xlnx_register_event(const enum pm_api_cb_id cb_type, const u32 node_id, + const u32 event, const bool wake, + event_cb_func_t cb_fun, void *data); + +int xlnx_unregister_event(const enum pm_api_cb_id cb_type, const u32 node_id, + const u32 event, event_cb_func_t cb_fun); +#else +static inline int xlnx_register_event(const enum pm_api_cb_id cb_type, const u32 node_id, + const u32 event, const bool wake, + event_cb_func_t cb_fun, void *data) +{ + return -ENODEV; +} + +static inline int xlnx_unregister_event(const enum pm_api_cb_id cb_type, const u32 node_id, + const u32 event, event_cb_func_t cb_fun) +{ + return -ENODEV; +} +#endif + +#endif /* _FIRMWARE_XLNX_EVENT_MANAGER_H_ */ diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 077e894bb340..907cb01890cf 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -141,6 +141,8 @@ enum pm_ioctl_id { /* Set healthy bit value */ IOCTL_SET_BOOT_HEALTH_STATUS = 17, IOCTL_OSPI_MUX_SELECT = 21, + /* Register SGI to ATF */ + IOCTL_REGISTER_SGI = 25, }; enum pm_query_id { -- cgit v1.2.3 From 14866a7db8da1f61fb6135c461b733694eea9580 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 1 Dec 2021 20:43:03 -0800 Subject: Documentation/auxiliary_bus: Add example code for module_auxiliary_driver() Add an example code snipit to the module_auxiliary_driver() documentation which is consistent with the other example code in the elsewhere in the documentation. Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20211202044305.4006853-6-ira.weiny@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index fc51d45f106b..605b27aab693 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -66,6 +66,10 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv); * Helper macro for auxiliary drivers which do not do anything special in * module init/exit. This eliminates a lot of boilerplate. Each module may only * use this macro once, and calling it replaces module_init() and module_exit() + * + * .. code-block:: c + * + * module_auxiliary_driver(my_drv); */ #define module_auxiliary_driver(__auxiliary_driver) \ module_driver(__auxiliary_driver, auxiliary_driver_register, auxiliary_driver_unregister) -- cgit v1.2.3 From e1b5186810cc7d4ec60447032636b8e6772dbbc6 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 1 Dec 2021 20:43:05 -0800 Subject: Documentation/auxiliary_bus: Move the text into the code The code and documentation are more difficult to maintain when kept separately. This is further compounded when the standard structure documentation infrastructure is not used. Move the documentation into the code, use the standard documentation infrastructure, add current documented functions, and reference the text in the rst file. Suggested-by: Greg Kroah-Hartman Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20211202044305.4006853-8-ira.weiny@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 160 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) (limited to 'include') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index 605b27aab693..e6d8b5c16226 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -11,12 +11,172 @@ #include #include +/** + * DOC: DEVICE_LIFESPAN + * + * The registering driver is the entity that allocates memory for the + * auxiliary_device and registers it on the auxiliary bus. It is important to + * note that, as opposed to the platform bus, the registering driver is wholly + * responsible for the management of the memory used for the device object. + * + * To be clear the memory for the auxiliary_device is freed in the release() + * callback defined by the registering driver. The registering driver should + * only call auxiliary_device_delete() and then auxiliary_device_uninit() when + * it is done with the device. The release() function is then automatically + * called if and when other code releases their reference to the devices. + * + * A parent object, defined in the shared header file, contains the + * auxiliary_device. It also contains a pointer to the shared object(s), which + * also is defined in the shared header. Both the parent object and the shared + * object(s) are allocated by the registering driver. This layout allows the + * auxiliary_driver's registering module to perform a container_of() call to go + * from the pointer to the auxiliary_device, that is passed during the call to + * the auxiliary_driver's probe function, up to the parent object, and then + * have access to the shared object(s). + * + * The memory for the shared object(s) must have a lifespan equal to, or + * greater than, the lifespan of the memory for the auxiliary_device. The + * auxiliary_driver should only consider that the shared object is valid as + * long as the auxiliary_device is still registered on the auxiliary bus. It + * is up to the registering driver to manage (e.g. free or keep available) the + * memory for the shared object beyond the life of the auxiliary_device. + * + * The registering driver must unregister all auxiliary devices before its own + * driver.remove() is completed. An easy way to ensure this is to use the + * devm_add_action_or_reset() call to register a function against the parent + * device which unregisters the auxiliary device object(s). + * + * Finally, any operations which operate on the auxiliary devices must continue + * to function (if only to return an error) after the registering driver + * unregisters the auxiliary device. + */ + +/** + * struct auxiliary_device - auxiliary device object. + * @dev: Device, + * The release and parent fields of the device structure must be filled + * in + * @name: Match name found by the auxiliary device driver, + * @id: unique identitier if multiple devices of the same name are exported, + * + * An auxiliary_device represents a part of its parent device's functionality. + * It is given a name that, combined with the registering drivers + * KBUILD_MODNAME, creates a match_name that is used for driver binding, and an + * id that combined with the match_name provide a unique name to register with + * the bus subsystem. For example, a driver registering an auxiliary device is + * named 'foo_mod.ko' and the subdevice is named 'foo_dev'. The match name is + * therefore 'foo_mod.foo_dev'. + * + * Registering an auxiliary_device is a three-step process. + * + * First, a 'struct auxiliary_device' needs to be defined or allocated for each + * sub-device desired. The name, id, dev.release, and dev.parent fields of + * this structure must be filled in as follows. + * + * The 'name' field is to be given a name that is recognized by the auxiliary + * driver. If two auxiliary_devices with the same match_name, eg + * "foo_mod.foo_dev", are registered onto the bus, they must have unique id + * values (e.g. "x" and "y") so that the registered devices names are + * "foo_mod.foo_dev.x" and "foo_mod.foo_dev.y". If match_name + id are not + * unique, then the device_add fails and generates an error message. + * + * The auxiliary_device.dev.type.release or auxiliary_device.dev.release must + * be populated with a non-NULL pointer to successfully register the + * auxiliary_device. This release call is where resources associated with the + * auxiliary device must be free'ed. Because once the device is placed on the + * bus the parent driver can not tell what other code may have a reference to + * this data. + * + * The auxiliary_device.dev.parent should be set. Typically to the registering + * drivers device. + * + * Second, call auxiliary_device_init(), which checks several aspects of the + * auxiliary_device struct and performs a device_initialize(). After this step + * completes, any error state must have a call to auxiliary_device_uninit() in + * its resolution path. + * + * The third and final step in registering an auxiliary_device is to perform a + * call to auxiliary_device_add(), which sets the name of the device and adds + * the device to the bus. + * + * .. code-block:: c + * + * #define MY_DEVICE_NAME "foo_dev" + * + * ... + * + * struct auxiliary_device *my_aux_dev = my_aux_dev_alloc(xxx); + * + * // Step 1: + * my_aux_dev->name = MY_DEVICE_NAME; + * my_aux_dev->id = my_unique_id_alloc(xxx); + * my_aux_dev->dev.release = my_aux_dev_release; + * my_aux_dev->dev.parent = my_dev; + * + * // Step 2: + * if (auxiliary_device_init(my_aux_dev)) + * goto fail; + * + * // Step 3: + * if (auxiliary_device_add(my_aux_dev)) { + * auxiliary_device_uninit(my_aux_dev); + * goto fail; + * } + * + * ... + * + * + * Unregistering an auxiliary_device is a two-step process to mirror the + * register process. First call auxiliary_device_delete(), then call + * auxiliary_device_uninit(). + * + * .. code-block:: c + * + * auxiliary_device_delete(my_dev->my_aux_dev); + * auxiliary_device_uninit(my_dev->my_aux_dev); + */ struct auxiliary_device { struct device dev; const char *name; u32 id; }; +/** + * struct auxiliary_driver - Definition of an auxiliary bus driver + * @probe: Called when a matching device is added to the bus. + * @remove: Called when device is removed from the bus. + * @shutdown: Called at shut-down time to quiesce the device. + * @suspend: Called to put the device to sleep mode. Usually to a power state. + * @resume: Called to bring a device from sleep mode. + * @name: Driver name. + * @driver: Core driver structure. + * @id_table: Table of devices this driver should match on the bus. + * + * Auxiliary drivers follow the standard driver model convention, where + * discovery/enumeration is handled by the core, and drivers provide probe() + * and remove() methods. They support power management and shutdown + * notifications using the standard conventions. + * + * Auxiliary drivers register themselves with the bus by calling + * auxiliary_driver_register(). The id_table contains the match_names of + * auxiliary devices that a driver can bind with. + * + * .. code-block:: c + * + * static const struct auxiliary_device_id my_auxiliary_id_table[] = { + * { .name = "foo_mod.foo_dev" }, + * {}, + * }; + * + * MODULE_DEVICE_TABLE(auxiliary, my_auxiliary_id_table); + * + * struct auxiliary_driver my_drv = { + * .name = "myauxiliarydrv", + * .id_table = my_auxiliary_id_table, + * .probe = my_drv_probe, + * .remove = my_drv_remove + * }; + */ struct auxiliary_driver { int (*probe)(struct auxiliary_device *auxdev, const struct auxiliary_device_id *id); void (*remove)(struct auxiliary_device *auxdev); -- cgit v1.2.3 From bb49e9e730c2906a958eee273a7819f401543d6c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 3 Dec 2021 12:16:58 +0100 Subject: fs: add is_idmapped_mnt() helper Multiple places open-code the same check to determine whether a given mount is idmapped. Introduce a simple helper function that can be used instead. This allows us to get rid of the fragile open-coding. We will later change the check that is used to determine whether a given mount is idmapped. Introducing a helper allows us to do this in a single place instead of doing it for multiple places. Link: https://lore.kernel.org/r/20211123114227.3124056-2-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-2-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-2-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner --- include/linux/fs.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index bbf812ce89a8..06cbefd76de7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2724,6 +2724,20 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file) { return mnt_user_ns(file->f_path.mnt); } + +/** + * is_idmapped_mnt - check whether a mount is mapped + * @mnt: the mount to check + * + * If @mnt has an idmapping attached to it @mnt is mapped. + * + * Return: true if mount is mapped, false if not. + */ +static inline bool is_idmapped_mnt(const struct vfsmount *mnt) +{ + return mnt_user_ns(mnt) != &init_user_ns; +} + extern long vfs_truncate(const struct path *, loff_t); int do_truncate(struct user_namespace *, struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); -- cgit v1.2.3 From a793d79ea3e041081cd7cbd8ee43d0b5e4914a2b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 3 Dec 2021 12:16:59 +0100 Subject: fs: move mapping helpers The low-level mapping helpers were so far crammed into fs.h. They are out of place there. The fs.h header should just contain the higher-level mapping helpers that interact directly with vfs objects such as struct super_block or struct inode and not the bare mapping helpers. Similarly, only vfs and specific fs code shall interact with low-level mapping helpers. And so they won't be made accessible automatically through regular {g,u}id helpers. Link: https://lore.kernel.org/r/20211123114227.3124056-3-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-3-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-3-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner --- include/linux/fs.h | 91 +------------------------------------ include/linux/mnt_idmapping.h | 101 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 90 deletions(-) create mode 100644 include/linux/mnt_idmapping.h (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 06cbefd76de7..b3bcb2129699 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -1624,34 +1625,6 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); } -/** - * kuid_into_mnt - map a kuid down into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kuid: kuid to be mapped - * - * Return: @kuid mapped according to @mnt_userns. - * If @kuid has no mapping INVALID_UID is returned. - */ -static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns, - kuid_t kuid) -{ - return make_kuid(mnt_userns, __kuid_val(kuid)); -} - -/** - * kgid_into_mnt - map a kgid down into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kgid: kgid to be mapped - * - * Return: @kgid mapped according to @mnt_userns. - * If @kgid has no mapping INVALID_GID is returned. - */ -static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns, - kgid_t kgid) -{ - return make_kgid(mnt_userns, __kgid_val(kgid)); -} - /** * i_uid_into_mnt - map an inode's i_uid down into a mnt_userns * @mnt_userns: user namespace of the mount the inode was found from @@ -1680,68 +1653,6 @@ static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, return kgid_into_mnt(mnt_userns, inode->i_gid); } -/** - * kuid_from_mnt - map a kuid up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kuid: kuid to be mapped - * - * Return: @kuid mapped up according to @mnt_userns. - * If @kuid has no mapping INVALID_UID is returned. - */ -static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns, - kuid_t kuid) -{ - return KUIDT_INIT(from_kuid(mnt_userns, kuid)); -} - -/** - * kgid_from_mnt - map a kgid up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kgid: kgid to be mapped - * - * Return: @kgid mapped up according to @mnt_userns. - * If @kgid has no mapping INVALID_GID is returned. - */ -static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, - kgid_t kgid) -{ - return KGIDT_INIT(from_kgid(mnt_userns, kgid)); -} - -/** - * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * - * Use this helper to initialize a new vfs or filesystem object based on - * the caller's fsuid. A common example is initializing the i_uid field of - * a newly allocated inode triggered by a creation event such as mkdir or - * O_CREAT. Other examples include the allocation of quotas for a specific - * user. - * - * Return: the caller's current fsuid mapped up according to @mnt_userns. - */ -static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) -{ - return kuid_from_mnt(mnt_userns, current_fsuid()); -} - -/** - * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * - * Use this helper to initialize a new vfs or filesystem object based on - * the caller's fsgid. A common example is initializing the i_gid field of - * a newly allocated inode triggered by a creation event such as mkdir or - * O_CREAT. Other examples include the allocation of quotas for a specific - * user. - * - * Return: the caller's current fsgid mapped up according to @mnt_userns. - */ -static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) -{ - return kgid_from_mnt(mnt_userns, current_fsgid()); -} - /** * inode_fsuid_set - initialize inode's i_uid field with callers fsuid * @inode: inode to initialize diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h new file mode 100644 index 000000000000..47c7811fadfe --- /dev/null +++ b/include/linux/mnt_idmapping.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MNT_IDMAPPING_H +#define _LINUX_MNT_IDMAPPING_H + +#include +#include + +struct user_namespace; +extern struct user_namespace init_user_ns; + +/** + * kuid_into_mnt - map a kuid down into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * @kuid: kuid to be mapped + * + * Return: @kuid mapped according to @mnt_userns. + * If @kuid has no mapping INVALID_UID is returned. + */ +static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns, + kuid_t kuid) +{ + return make_kuid(mnt_userns, __kuid_val(kuid)); +} + +/** + * kgid_into_mnt - map a kgid down into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * @kgid: kgid to be mapped + * + * Return: @kgid mapped according to @mnt_userns. + * If @kgid has no mapping INVALID_GID is returned. + */ +static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns, + kgid_t kgid) +{ + return make_kgid(mnt_userns, __kgid_val(kgid)); +} + +/** + * kuid_from_mnt - map a kuid up into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * @kuid: kuid to be mapped + * + * Return: @kuid mapped up according to @mnt_userns. + * If @kuid has no mapping INVALID_UID is returned. + */ +static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns, + kuid_t kuid) +{ + return KUIDT_INIT(from_kuid(mnt_userns, kuid)); +} + +/** + * kgid_from_mnt - map a kgid up into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * @kgid: kgid to be mapped + * + * Return: @kgid mapped up according to @mnt_userns. + * If @kgid has no mapping INVALID_GID is returned. + */ +static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, + kgid_t kgid) +{ + return KGIDT_INIT(from_kgid(mnt_userns, kgid)); +} + +/** + * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * + * Use this helper to initialize a new vfs or filesystem object based on + * the caller's fsuid. A common example is initializing the i_uid field of + * a newly allocated inode triggered by a creation event such as mkdir or + * O_CREAT. Other examples include the allocation of quotas for a specific + * user. + * + * Return: the caller's current fsuid mapped up according to @mnt_userns. + */ +static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) +{ + return kuid_from_mnt(mnt_userns, current_fsuid()); +} + +/** + * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * + * Use this helper to initialize a new vfs or filesystem object based on + * the caller's fsgid. A common example is initializing the i_gid field of + * a newly allocated inode triggered by a creation event such as mkdir or + * O_CREAT. Other examples include the allocation of quotas for a specific + * user. + * + * Return: the caller's current fsgid mapped up according to @mnt_userns. + */ +static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) +{ + return kgid_from_mnt(mnt_userns, current_fsgid()); +} + +#endif /* _LINUX_MNT_IDMAPPING_H */ -- cgit v1.2.3 From 476860b3eb4a50958243158861d5340066df5af2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 3 Dec 2021 12:17:00 +0100 Subject: fs: tweak fsuidgid_has_mapping() If the caller's fs{g,u}id aren't mapped in the mount's idmapping we can return early and skip the check whether the mapped fs{g,u}id also have a mapping in the filesystem's idmapping. If the fs{g,u}id aren't mapped in the mount's idmapping they consequently can't be mapped in the filesystem's idmapping. So there's no point in checking that. Link: https://lore.kernel.org/r/20211123114227.3124056-4-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-4-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-4-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner --- include/linux/fs.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index b3bcb2129699..db5ee15e36b1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1695,10 +1695,18 @@ static inline void inode_fsgid_set(struct inode *inode, static inline bool fsuidgid_has_mapping(struct super_block *sb, struct user_namespace *mnt_userns) { - struct user_namespace *s_user_ns = sb->s_user_ns; + struct user_namespace *fs_userns = sb->s_user_ns; + kuid_t kuid; + kgid_t kgid; - return kuid_has_mapping(s_user_ns, mapped_fsuid(mnt_userns)) && - kgid_has_mapping(s_user_ns, mapped_fsgid(mnt_userns)); + kuid = mapped_fsuid(mnt_userns); + if (!uid_valid(kuid)) + return false; + kgid = mapped_fsgid(mnt_userns); + if (!gid_valid(kgid)) + return false; + return kuid_has_mapping(fs_userns, kuid) && + kgid_has_mapping(fs_userns, kgid); } extern struct timespec64 current_time(struct inode *inode); -- cgit v1.2.3 From 1ac2a4104968e0a60b4b3572216a92aab5c1b025 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 3 Dec 2021 12:17:01 +0100 Subject: fs: account for filesystem mappings Currently we only support idmapped mounts for filesystems mounted without an idmapping. This was a conscious decision mentioned in multiple places (cf. e.g. [1]). As explained at length in [3] it is perfectly fine to extend support for idmapped mounts to filesystem's mounted with an idmapping should the need arise. The need has been there for some time now. Various container projects in userspace need this to run unprivileged and nested unprivileged containers (cf. [2]). Before we can port any filesystem that is mountable with an idmapping to support idmapped mounts we need to first extend the mapping helpers to account for the filesystem's idmapping. This again, is explained at length in our documentation at [3] but I'll give an overview here again. Currently, the low-level mapping helpers implement the remapping algorithms described in [3] in a simplified manner. Because we could rely on the fact that all filesystems supporting idmapped mounts are mounted without an idmapping the translation step from or into the filesystem idmapping could be skipped. In order to support idmapped mounts of filesystem's mountable with an idmapping the translation step we were able to skip before cannot be skipped anymore. A filesystem mounted with an idmapping is very likely to not use an identity mapping and will instead use a non-identity mapping. So the translation step from or into the filesystem's idmapping in the remapping algorithm cannot be skipped for such filesystems. More details with examples can be found in [3]. This patch adds a few new and prepares some already existing low-level mapping helpers to perform the full translation algorithm explained in [3]. The low-level helpers can be written in a way that they only perform the additional translation step when the filesystem is indeed mounted with an idmapping. If the low-level helpers detect that they are not dealing with an idmapped mount they can simply return the relevant k{g,u}id unchanged; no remapping needs to be performed at all. The no_idmapping() helper detects whether the shortcut can be used. If the low-level helpers detected that they are dealing with an idmapped mount but the underlying filesystem is mounted without an idmapping we can rely on the previous shorcut and can continue to skip the translation step from or into the filesystem's idmapping. These checks guarantee that only the minimal amount of work is performed. As before, if idmapped mounts aren't used the low-level helpers are idempotent and no work is performed at all. This patch adds the helpers mapped_k{g,u}id_fs() and mapped_k{g,u}id_user(). Following patches will port all places to replace the old k{g,u}id_into_mnt() and k{g,u}id_from_mnt() with these two new helpers. After the conversion is done k{g,u}id_into_mnt() and k{g,u}id_from_mnt() will be removed. This also concludes the renaming of the mapping helpers we started in [4]. Now, all mapping helpers will started with the "mapped_" prefix making everything nice and consistent. The mapped_k{g,u}id_fs() helpers replace the k{g,u}id_into_mnt() helpers. They are to be used when k{g,u}ids are to be mapped from the vfs, e.g. from from struct inode's i_{g,u}id. Conversely, the mapped_k{g,u}id_user() helpers replace the k{g,u}id_from_mnt() helpers. They are to be used when k{g,u}ids are to be written to disk, e.g. when entering from a system call to change ownership of a file. This patch only introduces the helpers. It doesn't yet convert the relevant places to account for filesystem mounted with an idmapping. [1]: commit 2ca4dcc4909d ("fs/mount_setattr: tighten permission checks") [2]: https://github.com/containers/podman/issues/10374 [3]: Documentations/filesystems/idmappings.rst [4]: commit a65e58e791a1 ("fs: document and rename fsid helpers") Link: https://lore.kernel.org/r/20211123114227.3124056-5-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-5-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-5-brauner@kernel.org Cc: Seth Forshee Cc: Amir Goldstein Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner --- include/linux/fs.h | 4 +- include/linux/mnt_idmapping.h | 193 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 191 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index db5ee15e36b1..57aee6ebba72 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1636,7 +1636,7 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return kuid_into_mnt(mnt_userns, inode->i_uid); + return mapped_kuid_fs(mnt_userns, &init_user_ns, inode->i_uid); } /** @@ -1650,7 +1650,7 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return kgid_into_mnt(mnt_userns, inode->i_gid); + return mapped_kgid_fs(mnt_userns, &init_user_ns, inode->i_gid); } /** diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 47c7811fadfe..60341cd33ccc 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -6,6 +6,11 @@ #include struct user_namespace; +/* + * Carries the initial idmapping of 0:0:4294967295 which is an identity + * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is + * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...]. + */ extern struct user_namespace init_user_ns; /** @@ -64,9 +69,189 @@ static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, return KGIDT_INIT(from_kgid(mnt_userns, kgid)); } +/** + * initial_idmapping - check whether this is the initial mapping + * @ns: idmapping to check + * + * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1, + * [...], 1000 to 1000 [...]. + * + * Return: true if this is the initial mapping, false if not. + */ +static inline bool initial_idmapping(const struct user_namespace *ns) +{ + return ns == &init_user_ns; +} + +/** + * no_idmapping - check whether we can skip remapping a kuid/gid + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * + * This function can be used to check whether a remapping between two + * idmappings is required. + * An idmapped mount is a mount that has an idmapping attached to it that + * is different from the filsystem's idmapping and the initial idmapping. + * If the initial mapping is used or the idmapping of the mount and the + * filesystem are identical no remapping is required. + * + * Return: true if remapping can be skipped, false if not. + */ +static inline bool no_idmapping(const struct user_namespace *mnt_userns, + const struct user_namespace *fs_userns) +{ + return initial_idmapping(mnt_userns) || mnt_userns == fs_userns; +} + +/** + * mapped_kuid_fs - map a filesystem kuid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kuid : kuid to be mapped + * + * Take a @kuid and remap it from @fs_userns into @mnt_userns. Use this + * function when preparing a @kuid to be reported to userspace. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kuid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kuid won't change when calling + * from_kuid() so we can simply retrieve the value via __kuid_val() + * directly. + * + * Return: @kuid mapped according to @mnt_userns. + * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is + * returned. + */ +static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kuid_t kuid) +{ + uid_t uid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kuid; + if (initial_idmapping(fs_userns)) + uid = __kuid_val(kuid); + else + uid = from_kuid(fs_userns, kuid); + if (uid == (uid_t)-1) + return INVALID_UID; + return make_kuid(mnt_userns, uid); +} + +/** + * mapped_kgid_fs - map a filesystem kgid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kgid : kgid to be mapped + * + * Take a @kgid and remap it from @fs_userns into @mnt_userns. Use this + * function when preparing a @kgid to be reported to userspace. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kgid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kgid won't change when calling + * from_kgid() so we can simply retrieve the value via __kgid_val() + * directly. + * + * Return: @kgid mapped according to @mnt_userns. + * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is + * returned. + */ +static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kgid_t kgid) +{ + gid_t gid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kgid; + if (initial_idmapping(fs_userns)) + gid = __kgid_val(kgid); + else + gid = from_kgid(fs_userns, kgid); + if (gid == (gid_t)-1) + return INVALID_GID; + return make_kgid(mnt_userns, gid); +} + +/** + * mapped_kuid_user - map a user kuid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kuid : kuid to be mapped + * + * Use the idmapping of @mnt_userns to remap a @kuid into @fs_userns. Use this + * function when preparing a @kuid to be written to disk or inode. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kuid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kuid won't change when calling + * make_kuid() so we can simply retrieve the value via KUIDT_INIT() + * directly. + * + * Return: @kuid mapped according to @mnt_userns. + * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is + * returned. + */ +static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kuid_t kuid) +{ + uid_t uid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kuid; + uid = from_kuid(mnt_userns, kuid); + if (uid == (uid_t)-1) + return INVALID_UID; + if (initial_idmapping(fs_userns)) + return KUIDT_INIT(uid); + return make_kuid(fs_userns, uid); +} + +/** + * mapped_kgid_user - map a user kgid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kgid : kgid to be mapped + * + * Use the idmapping of @mnt_userns to remap a @kgid into @fs_userns. Use this + * function when preparing a @kgid to be written to disk or inode. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kgid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kgid won't change when calling + * make_kgid() so we can simply retrieve the value via KGIDT_INIT() + * directly. + * + * Return: @kgid mapped according to @mnt_userns. + * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is + * returned. + */ +static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kgid_t kgid) +{ + gid_t gid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kgid; + gid = from_kgid(mnt_userns, kgid); + if (gid == (gid_t)-1) + return INVALID_GID; + if (initial_idmapping(fs_userns)) + return KGIDT_INIT(gid); + return make_kgid(fs_userns, gid); +} + /** * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount + * @mnt_userns: the mount's idmapping * * Use this helper to initialize a new vfs or filesystem object based on * the caller's fsuid. A common example is initializing the i_uid field of @@ -78,12 +263,12 @@ static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, */ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) { - return kuid_from_mnt(mnt_userns, current_fsuid()); + return mapped_kuid_user(mnt_userns, &init_user_ns, current_fsuid()); } /** * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount + * @mnt_userns: the mount's idmapping * * Use this helper to initialize a new vfs or filesystem object based on * the caller's fsgid. A common example is initializing the i_gid field of @@ -95,7 +280,7 @@ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) */ static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) { - return kgid_from_mnt(mnt_userns, current_fsgid()); + return mapped_kgid_user(mnt_userns, &init_user_ns, current_fsgid()); } #endif /* _LINUX_MNT_IDMAPPING_H */ -- cgit v1.2.3 From 914b08b330d6722ed081e14580aae6fe66cd5946 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Fri, 3 Dec 2021 15:59:00 +0100 Subject: Bluetooth: Add hci_cmd_sync_cancel to public API After transfer errors it makes sense to cancel an ongoing synchronous command that cannot complete anymore. To permit this, export the old hci_req_sync_cancel function as hci_cmd_sync_cancel in the API. Signed-off-by: Benjamin Berg Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 0336c1bc5d25..f4034bf8f1ce 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -37,6 +37,7 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_cmd_sync_init(struct hci_dev *hdev); void hci_cmd_sync_clear(struct hci_dev *hdev); +void hci_cmd_sync_cancel(struct hci_dev *hdev, int err); int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); -- cgit v1.2.3 From 8581fd402a0cf80b5298e3b225e7a7bd8f110e69 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Dec 2021 12:34:00 -0800 Subject: treewide: Add missing includes masked by cgroup -> bpf dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cgroup.h (therefore swap.h, therefore half of the universe) includes bpf.h which in turn includes module.h and slab.h. Since we're about to get rid of that dependency we need to clean things up. v2: drop the cpu.h include from cacheinfo.h, it's not necessary and it makes riscv sensitive to ordering of include files. Signed-off-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov Reviewed-by: Christoph Hellwig Acked-by: Krzysztof Wilczyński Acked-by: Peter Chen Acked-by: SeongJae Park Acked-by: Jani Nikula Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/all/20211120035253.72074-1-kuba@kernel.org/ # v1 Link: https://lore.kernel.org/all/20211120165528.197359-1-kuba@kernel.org/ # cacheinfo discussion Link: https://lore.kernel.org/bpf/20211202203400.1208663-1-kuba@kernel.org --- include/linux/cacheinfo.h | 1 - include/linux/device/driver.h | 1 + include/linux/filter.h | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2f909ed084c6..4ff37cb763ae 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -3,7 +3,6 @@ #define _LINUX_CACHEINFO_H #include -#include #include #include diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index a498ebcf4993..15e7c5e15d62 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -18,6 +18,7 @@ #include #include #include +#include /** * enum probe_type - device driver probe type to try diff --git a/include/linux/filter.h b/include/linux/filter.h index 534f678ca50f..7f1e88e3e2b5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -6,6 +6,7 @@ #define __LINUX_FILTER_H__ #include +#include #include #include #include @@ -26,7 +27,6 @@ #include #include -#include struct sk_buff; struct sock; -- cgit v1.2.3 From 4bdcd1dd4d2f973b1a89fb20ba720d879e9e506b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 28 Oct 2021 08:47:05 -0600 Subject: mm: move filemap_range_needs_writeback() into header No functional changes in this patch, just in preparation for efficiently calling this light function from the block O_DIRECT handling. Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 -- include/linux/pagemap.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index bbf812ce89a8..6b8dc1a78df6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2847,8 +2847,6 @@ static inline int filemap_fdatawait(struct address_space *mapping) extern bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend); -extern bool filemap_range_needs_writeback(struct address_space *, - loff_t lstart, loff_t lend); extern int filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend); extern int __filemap_fdatawrite_range(struct address_space *mapping, diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 605246452305..274a0710f2c5 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -963,6 +963,35 @@ static inline int add_to_page_cache(struct page *page, int __filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp); +bool filemap_range_has_writeback(struct address_space *mapping, + loff_t start_byte, loff_t end_byte); + +/** + * filemap_range_needs_writeback - check if range potentially needs writeback + * @mapping: address space within which to check + * @start_byte: offset in bytes where the range starts + * @end_byte: offset in bytes where the range ends (inclusive) + * + * Find at least one page in the range supplied, usually used to check if + * direct writing in this range will trigger a writeback. Used by O_DIRECT + * read/write with IOCB_NOWAIT, to see if the caller needs to do + * filemap_write_and_wait_range() before proceeding. + * + * Return: %true if the caller should do filemap_write_and_wait_range() before + * doing O_DIRECT to a page in this range, %false otherwise. + */ +static inline bool filemap_range_needs_writeback(struct address_space *mapping, + loff_t start_byte, + loff_t end_byte) +{ + if (!mapping->nrpages) + return false; + if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && + !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) + return false; + return filemap_range_has_writeback(mapping, start_byte, end_byte); +} + /** * struct readahead_control - Describes a readahead request. * -- cgit v1.2.3 From 0a467d0fdd9594fbb449ebc93852533332c528fd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 14 Oct 2021 14:39:59 -0600 Subject: block: switch to atomic_t for request references refcount_t is not as expensive as it used to be, but it's still more expensive than the io_uring method of using atomic_t and just checking for potential over/underflow. This borrows that same implementation, which in turn is based on the mm implementation from Linus. Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 1b87b7c8bbff..561beb5be7ec 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -139,7 +139,7 @@ struct request { unsigned short ioprio; enum mq_rq_state state; - refcount_t ref; + atomic_t ref; unsigned long deadline; -- cgit v1.2.3 From 704b914f15fb7daaf517e3acc4bed472b50ca19e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 3 Dec 2021 21:15:32 +0800 Subject: blk-mq: move srcu from blk_mq_hw_ctx to request_queue In case of BLK_MQ_F_BLOCKING, per-hctx srcu is used to protect dispatch critical area. However, this srcu instance stays at the end of hctx, and it often takes standalone cacheline, often cold. Inside srcu_read_lock() and srcu_read_unlock(), WRITE is always done on the indirect percpu variable which is allocated from heap instead of being embedded, srcu->srcu_idx is read only in srcu_read_lock(). It doesn't matter if srcu structure stays in hctx or request queue. So switch to per-request-queue srcu for protecting dispatch, and this way simplifies quiesce a lot, not mention quiesce is always done on the request queue wide. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20211203131534.3668411-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 8 -------- include/linux/blkdev.h | 9 +++++++++ 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 561beb5be7ec..ecdc049b52fa 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -4,7 +4,6 @@ #include #include -#include #include #include #include @@ -375,13 +374,6 @@ struct blk_mq_hw_ctx { * q->unused_hctx_list. */ struct list_head hctx_list; - - /** - * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is - * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also - * blk_mq_hw_ctx_size(). - */ - struct srcu_struct srcu[]; }; /** diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0a4416ef4fbf..c80cfaefc0a8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -16,6 +16,7 @@ #include #include #include +#include struct module; struct request_queue; @@ -373,11 +374,18 @@ struct request_queue { * devices that do not have multiple independent access ranges. */ struct blk_independent_access_ranges *ia_ranges; + + /** + * @srcu: Sleepable RCU. Use as lock when type of the request queue + * is blocking (BLK_MQ_F_BLOCKING). Must be the last member + */ + struct srcu_struct srcu[]; }; /* Keep blk_queue_flag_name[] in sync with the definitions below */ #define QUEUE_FLAG_STOPPED 0 /* queue is stopped */ #define QUEUE_FLAG_DYING 1 /* queue being torn down */ +#define QUEUE_FLAG_HAS_SRCU 2 /* SRCU is allocated */ #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ @@ -415,6 +423,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) +#define blk_queue_has_srcu(q) test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags) #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) -- cgit v1.2.3 From 0cc3a8017900f856f9bf4fdc41c2b5cb1670aabe Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Thu, 2 Dec 2021 13:01:56 -0800 Subject: qed*: enhance tx timeout debug info This patch add some new qed APIs to query status block info and report various data to MFW on tx timeout event Along with that it enhances qede to dump more debug logs (not just specific to the queue which was reported by stack) on tx timeout which includes various other basic metadata about all tx queues and other info (like status block etc.) Signed-off-by: Manish Chopra Signed-off-by: Prabhakar Kushwaha Signed-off-by: Alok Prasad Signed-off-by: Ariel Elior Signed-off-by: Jakub Kicinski --- include/linux/qed/qed_if.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 0dae7fcc5ef2..9f4bfa2a4829 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -807,6 +807,12 @@ struct qed_devlink { struct devlink_health_reporter *fw_reporter; }; +struct qed_sb_info_dbg { + u32 igu_prod; + u32 igu_cons; + u16 pi[PIS_PER_SB]; +}; + struct qed_common_cb_ops { void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc); void (*link_update)(void *dev, struct qed_link_output *link); @@ -1194,6 +1200,11 @@ struct qed_common_ops { struct devlink* (*devlink_register)(struct qed_dev *cdev); void (*devlink_unregister)(struct devlink *devlink); + + __printf(2, 3) void (*mfw_report)(struct qed_dev *cdev, char *fmt, ...); + + int (*get_sb_info)(struct qed_dev *cdev, struct qed_sb_info *sb, + u16 qid, struct qed_sb_info_dbg *sb_dbg); }; #define MASK_FIELD(_name, _value) \ -- cgit v1.2.3 From 823163ba6e52e644be5df4539a19e3df8d0988dd Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Thu, 2 Dec 2021 13:01:57 -0800 Subject: qed*: esl priv flag support through ethtool ESL(Enhanced System Lockdown) was designed to lock PCI adapter firmware images and prevent changes to critical non-volatile configuration data so that uncontrolled, malicious or unintentional modification to the adapters are avoided, ensuring it's operational state. Once this feature is enabled, the device is locked, rejecting any modification to non-volatile images. Once unlocked, the protection is off such that firmware and non-volatile configurations may be altered. Driver just reflects the capability and status of this through the ethtool private flag. Signed-off-by: Manish Chopra Signed-off-by: Prabhakar Kushwaha Signed-off-by: Alok Prasad Signed-off-by: Ariel Elior Signed-off-by: Jakub Kicinski --- include/linux/qed/qed_if.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 9f4bfa2a4829..6dc4943d8aec 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -652,6 +652,7 @@ struct qed_dev_info { bool wol_support; bool smart_an; + bool esl; /* MBI version */ u32 mbi_version; @@ -1205,6 +1206,8 @@ struct qed_common_ops { int (*get_sb_info)(struct qed_dev *cdev, struct qed_sb_info *sb, u16 qid, struct qed_sb_info_dbg *sb_dbg); + + int (*get_esl_status)(struct qed_dev *cdev, bool *esl_active); }; #define MASK_FIELD(_name, _value) \ -- cgit v1.2.3 From a3642021923b26d86bb27d88c826494827612c06 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 29 Nov 2021 18:46:47 +0100 Subject: locking/rtmutex: Add rt_mutex_lock_nest_lock() and rt_mutex_lock_killable(). The locking selftest for ww-mutex expects to operate directly on the base-mutex which becomes a rtmutex on PREEMPT_RT. Add a rtmutex based implementation of mutex_lock_nest_lock() and mutex_lock_killable() named rt_mutex_lock_nest_lock() abd rt_mutex_lock_killable(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211129174654.668506-5-bigeasy@linutronix.de --- include/linux/rtmutex.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 9deedfeec2b1..7d049883a08a 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -99,13 +99,22 @@ extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass); +extern void _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock); #define rt_mutex_lock(lock) rt_mutex_lock_nested(lock, 0) +#define rt_mutex_lock_nest_lock(lock, nest_lock) \ + do { \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ + _rt_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ + } while (0) + #else extern void rt_mutex_lock(struct rt_mutex *lock); #define rt_mutex_lock_nested(lock, subclass) rt_mutex_lock(lock) +#define rt_mutex_lock_nest_lock(lock, nest_lock) rt_mutex_lock(lock) #endif extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); +extern int rt_mutex_lock_killable(struct rt_mutex *lock); extern int rt_mutex_trylock(struct rt_mutex *lock); extern void rt_mutex_unlock(struct rt_mutex *lock); -- cgit v1.2.3 From 0c1d7a2c2d32fac7ff4a644724b2d52a64184645 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 29 Nov 2021 18:46:48 +0100 Subject: lockdep: Remove softirq accounting on PREEMPT_RT. There is not really a softirq context on PREEMPT_RT. Softirqs on PREEMPT_RT are always invoked within the context of a threaded interrupt handler or within ksoftirqd. The "in-softirq" context is preemptible and is protected by a per-CPU lock to ensure mutual exclusion. There is no difference on PREEMPT_RT between spin_lock_irq() and spin_lock() because the former does not disable interrupts. Therefore if a lock is used in_softirq() and locked once with spin_lock_irq() then lockdep will report this with "inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage". Teach lockdep that we don't really do softirqs on -RT. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211129174654.668506-6-bigeasy@linutronix.de --- include/linux/irqflags.h | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 600c10da321a..4b140938b03e 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -71,14 +71,6 @@ do { \ do { \ __this_cpu_dec(hardirq_context); \ } while (0) -# define lockdep_softirq_enter() \ -do { \ - current->softirq_context++; \ -} while (0) -# define lockdep_softirq_exit() \ -do { \ - current->softirq_context--; \ -} while (0) # define lockdep_hrtimer_enter(__hrtimer) \ ({ \ @@ -140,6 +132,21 @@ do { \ # define lockdep_irq_work_exit(__work) do { } while (0) #endif +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT) +# define lockdep_softirq_enter() \ +do { \ + current->softirq_context++; \ +} while (0) +# define lockdep_softirq_exit() \ +do { \ + current->softirq_context--; \ +} while (0) + +#else +# define lockdep_softirq_enter() do { } while (0) +# define lockdep_softirq_exit() do { } while (0) +#endif + #if defined(CONFIG_IRQSOFF_TRACER) || \ defined(CONFIG_PREEMPT_TRACER) extern void stop_critical_timings(void); -- cgit v1.2.3 From c0bed69daf4b67809b58cc7cd81a8fa4f45bc161 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 3 Dec 2021 15:59:34 +0800 Subject: locking: Make owner_on_cpu() into Move the owner_on_cpu() from kernel/locking/rwsem.c into include/linux/sched.h with under CONFIG_SMP, then use it in the mutex/rwsem/rtmutex to simplify the code. Signed-off-by: Kefeng Wang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211203075935.136808-2-wangkefeng.wang@huawei.com --- include/linux/sched.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 78c351e35fec..ff609d9c2f21 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2171,6 +2171,15 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask); #endif #ifdef CONFIG_SMP +static inline bool owner_on_cpu(struct task_struct *owner) +{ + /* + * As lock holder preemption issue, we both skip spinning if + * task is not on cpu or its cpu is preempted + */ + return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); +} + /* Returns effective CPU energy utilization, as seen by the scheduler */ unsigned long sched_cpu_util(int cpu, unsigned long max); #endif /* CONFIG_SMP */ -- cgit v1.2.3 From 4cf75fd4a2545ca4deea992f929602c9fdbe8058 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 3 Dec 2021 15:59:35 +0800 Subject: locking: Mark racy reads of owner->on_cpu One of the more frequent data races reported by KCSAN is the racy read in mutex_spin_on_owner(), which is usually reported as "race of unknown origin" without showing the writer. This is due to the racing write occurring in kernel/sched. Locally enabling KCSAN in kernel/sched shows: | write (marked) to 0xffff97f205079934 of 4 bytes by task 316 on cpu 6: | finish_task kernel/sched/core.c:4632 [inline] | finish_task_switch kernel/sched/core.c:4848 | context_switch kernel/sched/core.c:4975 [inline] | __schedule kernel/sched/core.c:6253 | schedule kernel/sched/core.c:6326 | schedule_preempt_disabled kernel/sched/core.c:6385 | __mutex_lock_common kernel/locking/mutex.c:680 | __mutex_lock kernel/locking/mutex.c:740 [inline] | __mutex_lock_slowpath kernel/locking/mutex.c:1028 | mutex_lock kernel/locking/mutex.c:283 | tty_open_by_driver drivers/tty/tty_io.c:2062 [inline] | ... | | read to 0xffff97f205079934 of 4 bytes by task 322 on cpu 3: | mutex_spin_on_owner kernel/locking/mutex.c:370 | mutex_optimistic_spin kernel/locking/mutex.c:480 | __mutex_lock_common kernel/locking/mutex.c:610 | __mutex_lock kernel/locking/mutex.c:740 [inline] | __mutex_lock_slowpath kernel/locking/mutex.c:1028 | mutex_lock kernel/locking/mutex.c:283 | tty_open_by_driver drivers/tty/tty_io.c:2062 [inline] | ... | | value changed: 0x00000001 -> 0x00000000 This race is clearly intentional, and the potential for miscompilation is slim due to surrounding barrier() and cpu_relax(), and the value being used as a boolean. Nevertheless, marking this reader would more clearly denote intent and make it obvious that concurrency is expected. Use READ_ONCE() to avoid having to reason about compiler optimizations now and in future. With previous refactor, mark the read to owner->on_cpu in owner_on_cpu(), which immediately precedes the loop executing mutex_spin_on_owner(). Signed-off-by: Marco Elver Signed-off-by: Kefeng Wang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211203075935.136808-3-wangkefeng.wang@huawei.com --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index ff609d9c2f21..0b9b0e3f4791 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2177,7 +2177,7 @@ static inline bool owner_on_cpu(struct task_struct *owner) * As lock holder preemption issue, we both skip spinning if * task is not on cpu or its cpu is preempted */ - return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); + return READ_ONCE(owner->on_cpu) && !vcpu_is_preempted(task_cpu(owner)); } /* Returns effective CPU energy utilization, as seen by the scheduler */ -- cgit v1.2.3 From fb08a1908cb119a4585611d91461ab6d27756b14 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Nov 2021 11:21:38 +0100 Subject: dax: simplify the dax_device <-> gendisk association Replace the dax_host_hash with an xarray indexed by the pointer value of the gendisk, and require explicitly calls from the block drivers that want to associate their gendisk with a dax_device. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20211129102203.2243509-5-hch@lst.de Signed-off-by: Dan Williams --- include/linux/dax.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index 8623caa67388..e2e9a67004cb 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -11,9 +11,11 @@ typedef unsigned long dax_entry_t; +struct dax_device; +struct gendisk; struct iomap_ops; struct iomap; -struct dax_device; + struct dax_operations { /* * direct_access: translate a device-relative @@ -39,8 +41,8 @@ struct dax_operations { }; #if IS_ENABLED(CONFIG_DAX) -struct dax_device *alloc_dax(void *private, const char *host, - const struct dax_operations *ops, unsigned long flags); +struct dax_device *alloc_dax(void *private, const struct dax_operations *ops, + unsigned long flags); void put_dax(struct dax_device *dax_dev); void kill_dax(struct dax_device *dax_dev); void dax_write_cache(struct dax_device *dax_dev, bool wc); @@ -68,7 +70,7 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, return dax_synchronous(dax_dev); } #else -static inline struct dax_device *alloc_dax(void *private, const char *host, +static inline struct dax_device *alloc_dax(void *private, const struct dax_operations *ops, unsigned long flags) { /* @@ -107,6 +109,8 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, struct writeback_control; int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) +int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); +void dax_remove_host(struct gendisk *disk); bool generic_fsdax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, sector_t sectors); @@ -128,6 +132,13 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t st dax_entry_t dax_lock_page(struct page *page); void dax_unlock_page(struct page *page, dax_entry_t cookie); #else +static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk) +{ + return 0; +} +static inline void dax_remove_host(struct gendisk *disk) +{ +} #define generic_fsdax_supported NULL static inline bool dax_supported(struct dax_device *dax_dev, -- cgit v1.2.3 From 7b0800d00dae8c897398abaf61e82db0d67d7afc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Nov 2021 11:21:42 +0100 Subject: dax: remove dax_capable Just open code the block size and dax_dev == NULL checks in the callers. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Reviewed-by: Gao Xiang [erofs] Reviewed-by: Dan Williams Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20211129102203.2243509-9-hch@lst.de Signed-off-by: Dan Williams --- include/linux/dax.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index e2e9a67004cb..439c3c70e347 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -111,12 +111,6 @@ int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); void dax_remove_host(struct gendisk *disk); -bool generic_fsdax_supported(struct dax_device *dax_dev, - struct block_device *bdev, int blocksize, sector_t start, - sector_t sectors); - -bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, - int blocksize, sector_t start, sector_t len); static inline void fs_put_dax(struct dax_device *dax_dev) { @@ -139,14 +133,6 @@ static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk) static inline void dax_remove_host(struct gendisk *disk) { } -#define generic_fsdax_supported NULL - -static inline bool dax_supported(struct dax_device *dax_dev, - struct block_device *bdev, int blocksize, sector_t start, - sector_t len) -{ - return false; -} static inline void fs_put_dax(struct dax_device *dax_dev) { -- cgit v1.2.3 From 60696eb26a37ab0199f7833ddbc1b75138c36d16 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Nov 2021 11:21:48 +0100 Subject: fsdax: simplify the pgoff calculation Replace the two steps of dax_iomap_sector and bdev_dax_pgoff with a single dax_iomap_pgoff helper that avoids lots of cumbersome sector conversions. Signed-off-by: Christoph Hellwig Reviewed-by: Dan Williams Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20211129102203.2243509-15-hch@lst.de Signed-off-by: Dan Williams --- include/linux/dax.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index 439c3c70e347..324363b798ec 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -107,7 +107,6 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, #endif struct writeback_control; -int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); void dax_remove_host(struct gendisk *disk); -- cgit v1.2.3 From c6f40468657d16e4010ef84bf32a761feb3469ea Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Nov 2021 11:21:52 +0100 Subject: fsdax: decouple zeroing from the iomap buffered I/O code Unshare the DAX and iomap buffered I/O page zeroing code. This code previously did a IS_DAX check deep inside the iomap code, which in fact was the only DAX check in the code. Instead move these checks into the callers. Most callers already have DAX special casing anyway and XFS will need it for reflink support as well. Signed-off-by: Christoph Hellwig Reviewed-by: Dan Williams Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20211129102203.2243509-19-hch@lst.de Signed-off-by: Dan Williams --- include/linux/dax.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index 324363b798ec..b79036743e7f 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -14,6 +14,7 @@ typedef unsigned long dax_entry_t; struct dax_device; struct gendisk; struct iomap_ops; +struct iomap_iter; struct iomap; struct dax_operations { @@ -170,6 +171,11 @@ static inline void dax_unlock_page(struct page *page, dax_entry_t cookie) } #endif +int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, + const struct iomap_ops *ops); +int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, + const struct iomap_ops *ops); + #if IS_ENABLED(CONFIG_DAX) int dax_read_lock(void); void dax_read_unlock(int id); @@ -204,7 +210,6 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); -s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap); static inline bool dax_mapping(struct address_space *mapping) { return mapping->host && IS_DAX(mapping->host); -- cgit v1.2.3 From 952da06375c8f3aa58474fff718d9ae8442531b9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Nov 2021 11:21:58 +0100 Subject: iomap: add a IOMAP_DAX flag Add a flag so that the file system can easily detect DAX operations based just on the iomap operation requested instead of looking at inode state using IS_DAX. This will be needed to apply the to be added partition offset only for operations that actually use DAX, but not things like fiemap that are based on the block device. In the long run it should also allow turning the bdev, dax_dev and inline_data into a union. Signed-off-by: Christoph Hellwig Reviewed-by: Dan Williams Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20211129102203.2243509-25-hch@lst.de Signed-off-by: Dan Williams --- include/linux/iomap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 6d1b08d0ae93..5b9432f9f79e 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -141,6 +141,11 @@ struct iomap_page_ops { #define IOMAP_NOWAIT (1 << 5) /* do not block */ #define IOMAP_OVERWRITE_ONLY (1 << 6) /* only pure overwrites allowed */ #define IOMAP_UNSHARE (1 << 7) /* unshare_file_range */ +#ifdef CONFIG_FS_DAX +#define IOMAP_DAX (1 << 8) /* DAX mapping */ +#else +#define IOMAP_DAX 0 +#endif /* CONFIG_FS_DAX */ struct iomap_ops { /* -- cgit v1.2.3 From cd913c76f489def1a388e3a5b10df94948ede3f5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Nov 2021 11:21:59 +0100 Subject: dax: return the partition offset from fs_dax_get_by_bdev Prepare for the removal of the block_device from the DAX I/O path by returning the partition offset from fs_dax_get_by_bdev so that the file systems have it at hand for use during I/O. Signed-off-by: Christoph Hellwig Reviewed-by: Dan Williams Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20211129102203.2243509-26-hch@lst.de Signed-off-by: Dan Williams --- include/linux/dax.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index b79036743e7f..f6f353382cc9 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -117,7 +117,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev) put_dax(dax_dev); } -struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); +struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, + u64 *start_off); int dax_writeback_mapping_range(struct address_space *mapping, struct dax_device *dax_dev, struct writeback_control *wbc); @@ -138,7 +139,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev) { } -static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) +static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, + u64 *start_off) { return NULL; } -- cgit v1.2.3 From 2ede892342b3c628991ff1b9060108a7edd92d94 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Nov 2021 11:22:01 +0100 Subject: dax: fix up some of the block device related ifdefs The DAX device <-> block device association is only enabled if CONFIG_BLOCK is enabled. Update dax.h to account for that and use the right conditions for the fs_put_dax stub as well. Signed-off-by: Christoph Hellwig Reviewed-by: Dan Williams Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20211129102203.2243509-28-hch@lst.de Signed-off-by: Dan Williams --- include/linux/dax.h | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index f6f353382cc9..87ae4c9b1d65 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -108,24 +108,15 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, #endif struct writeback_control; -#if IS_ENABLED(CONFIG_FS_DAX) +#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); void dax_remove_host(struct gendisk *disk); - +struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, + u64 *start_off); static inline void fs_put_dax(struct dax_device *dax_dev) { put_dax(dax_dev); } - -struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, - u64 *start_off); -int dax_writeback_mapping_range(struct address_space *mapping, - struct dax_device *dax_dev, struct writeback_control *wbc); - -struct page *dax_layout_busy_page(struct address_space *mapping); -struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); -dax_entry_t dax_lock_page(struct page *page); -void dax_unlock_page(struct page *page, dax_entry_t cookie); #else static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk) { @@ -134,17 +125,25 @@ static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk) static inline void dax_remove_host(struct gendisk *disk) { } - -static inline void fs_put_dax(struct dax_device *dax_dev) -{ -} - static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off) { return NULL; } +static inline void fs_put_dax(struct dax_device *dax_dev) +{ +} +#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */ +#if IS_ENABLED(CONFIG_FS_DAX) +int dax_writeback_mapping_range(struct address_space *mapping, + struct dax_device *dax_dev, struct writeback_control *wbc); + +struct page *dax_layout_busy_page(struct address_space *mapping); +struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); +dax_entry_t dax_lock_page(struct page *page); +void dax_unlock_page(struct page *page, dax_entry_t cookie); +#else static inline struct page *dax_layout_busy_page(struct address_space *mapping) { return NULL; -- cgit v1.2.3 From 866de407444398bc8140ea70de1dba5f91cc34ac Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 3 Dec 2021 13:30:01 +0800 Subject: bpf: Disallow BPF_LOG_KERNEL log level for bpf(BPF_BTF_LOAD) BPF_LOG_KERNEL is only used internally, so disallow bpf_btf_load() to set log level as BPF_LOG_KERNEL. The same checking has already been done in bpf_check(), so factor out a helper to check the validity of log attributes and use it in both places. Fixes: 8580ac9404f6 ("bpf: Process in-kernel BTF") Signed-off-by: Hou Tao Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20211203053001.740945-1-houtao1@huawei.com --- include/linux/bpf_verifier.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c8a78e830fca..182b16a91084 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -396,6 +396,13 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) log->level == BPF_LOG_KERNEL); } +static inline bool +bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) +{ + return log->len_total >= 128 && log->len_total <= UINT_MAX >> 2 && + log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK); +} + #define BPF_MAX_SUBPROGS 256 struct bpf_subprog_info { -- cgit v1.2.3 From ccf7cf92373d1a53166582013430b3b9c05a6ba2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Jul 2021 09:39:15 -0500 Subject: f2fs: fix the f2fs_file_write_iter tracepoint Pass in the original position and count rather than the position and count that were updated by the write. Also use the correct types for all arguments, in particular the file offset which was being truncated to 32 bits on 32-bit platforms. Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index f8cb916f3595..dcb94d740e12 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -540,17 +540,17 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, TRACE_EVENT(f2fs_file_write_iter, - TP_PROTO(struct inode *inode, unsigned long offset, - unsigned long length, int ret), + TP_PROTO(struct inode *inode, loff_t offset, size_t length, + ssize_t ret), TP_ARGS(inode, offset, length, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(unsigned long, offset) - __field(unsigned long, length) - __field(int, ret) + __field(loff_t, offset) + __field(size_t, length) + __field(ssize_t, ret) ), TP_fast_assign( @@ -562,7 +562,7 @@ TRACE_EVENT(f2fs_file_write_iter, ), TP_printk("dev = (%d,%d), ino = %lu, " - "offset = %lu, length = %lu, written(err) = %d", + "offset = %lld, length = %zu, written(err) = %zd", show_dev_ino(__entry), __entry->offset, __entry->length, -- cgit v1.2.3 From b80892ca022e9eb484771a66eb68e12364695a2a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 Oct 2021 17:10:17 +0200 Subject: memremap: remove support for external pgmap refcounts No driver is left using the external pgmap refcount, so remove the code to support it. Signed-off-by: Christoph Hellwig Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20211028151017.50234-1-hch@lst.de Signed-off-by: Dan Williams --- include/linux/memremap.h | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index c0e9d35889e8..a8bc588fe7aa 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -72,16 +72,6 @@ struct dev_pagemap_ops { */ void (*page_free)(struct page *page); - /* - * Transition the refcount in struct dev_pagemap to the dead state. - */ - void (*kill)(struct dev_pagemap *pgmap); - - /* - * Wait for refcount in struct dev_pagemap to be idle and reap it. - */ - void (*cleanup)(struct dev_pagemap *pgmap); - /* * Used for private (un-addressable) device memory only. Must migrate * the page back to a CPU accessible page. @@ -95,8 +85,7 @@ struct dev_pagemap_ops { * struct dev_pagemap - metadata for ZONE_DEVICE mappings * @altmap: pre-allocated/reserved memory for vmemmap allocations * @ref: reference count that pins the devm_memremap_pages() mapping - * @internal_ref: internal reference if @ref is not provided by the caller - * @done: completion for @internal_ref + * @done: completion for @ref * @type: memory type: see MEMORY_* in memory_hotplug.h * @flags: PGMAP_* flags to specify defailed behavior * @ops: method table @@ -109,8 +98,7 @@ struct dev_pagemap_ops { */ struct dev_pagemap { struct vmem_altmap altmap; - struct percpu_ref *ref; - struct percpu_ref internal_ref; + struct percpu_ref ref; struct completion done; enum memory_type type; unsigned int flags; @@ -191,7 +179,7 @@ static inline unsigned long memremap_compat_align(void) static inline void put_dev_pagemap(struct dev_pagemap *pgmap) { if (pgmap) - percpu_ref_put(pgmap->ref); + percpu_ref_put(&pgmap->ref); } #endif /* _LINUX_MEMREMAP_H_ */ -- cgit v1.2.3 From 02e4079913500f24ceb082d8d87d8665f044b298 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 3 Dec 2021 12:17:04 +0100 Subject: fs: remove unused low-level mapping helpers Now that we ported all places to use the new low-level mapping helpers that are able to support filesystems mounted with an idmapping we can remove the old low-level mapping helpers. With the removal of these old helpers we also conclude the renaming of the mapping helpers we started in commit a65e58e791a1 ("fs: document and rename fsid helpers"). Link: https://lore.kernel.org/r/20211123114227.3124056-8-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-8-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-8-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner --- include/linux/mnt_idmapping.h | 56 ------------------------------------------- 1 file changed, 56 deletions(-) (limited to 'include') diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 60341cd33ccc..0c6ab3f4c952 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -13,62 +13,6 @@ struct user_namespace; */ extern struct user_namespace init_user_ns; -/** - * kuid_into_mnt - map a kuid down into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kuid: kuid to be mapped - * - * Return: @kuid mapped according to @mnt_userns. - * If @kuid has no mapping INVALID_UID is returned. - */ -static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns, - kuid_t kuid) -{ - return make_kuid(mnt_userns, __kuid_val(kuid)); -} - -/** - * kgid_into_mnt - map a kgid down into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kgid: kgid to be mapped - * - * Return: @kgid mapped according to @mnt_userns. - * If @kgid has no mapping INVALID_GID is returned. - */ -static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns, - kgid_t kgid) -{ - return make_kgid(mnt_userns, __kgid_val(kgid)); -} - -/** - * kuid_from_mnt - map a kuid up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kuid: kuid to be mapped - * - * Return: @kuid mapped up according to @mnt_userns. - * If @kuid has no mapping INVALID_UID is returned. - */ -static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns, - kuid_t kuid) -{ - return KUIDT_INIT(from_kuid(mnt_userns, kuid)); -} - -/** - * kgid_from_mnt - map a kgid up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kgid: kgid to be mapped - * - * Return: @kgid mapped up according to @mnt_userns. - * If @kgid has no mapping INVALID_GID is returned. - */ -static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, - kgid_t kgid) -{ - return KGIDT_INIT(from_kgid(mnt_userns, kgid)); -} - /** * initial_idmapping - check whether this is the initial mapping * @ns: idmapping to check -- cgit v1.2.3 From 209188ce75d0d357c292f6bb81d712acdd4e7db7 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 3 Dec 2021 12:17:05 +0100 Subject: fs: port higher-level mapping helpers Enable the mapped_fs{g,u}id() helpers to support filesystems mounted with an idmapping. Apart from core mapping helpers that use mapped_fs{g,u}id() to initialize struct inode's i_{g,u}id fields xfs is the only place that uses these low-level helpers directly. The patch only extends the helpers to be able to take the filesystem idmapping into account. Since we don't actually yet pass the filesystem's idmapping in no functional changes happen. This will happen in a final patch. Link: https://lore.kernel.org/r/20211123114227.3124056-9-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-9-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-9-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner --- include/linux/fs.h | 8 ++++---- include/linux/mnt_idmapping.h | 12 ++++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 57aee6ebba72..e1f28f757f1b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1664,7 +1664,7 @@ static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, static inline void inode_fsuid_set(struct inode *inode, struct user_namespace *mnt_userns) { - inode->i_uid = mapped_fsuid(mnt_userns); + inode->i_uid = mapped_fsuid(mnt_userns, &init_user_ns); } /** @@ -1678,7 +1678,7 @@ static inline void inode_fsuid_set(struct inode *inode, static inline void inode_fsgid_set(struct inode *inode, struct user_namespace *mnt_userns) { - inode->i_gid = mapped_fsgid(mnt_userns); + inode->i_gid = mapped_fsgid(mnt_userns, &init_user_ns); } /** @@ -1699,10 +1699,10 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, kuid_t kuid; kgid_t kgid; - kuid = mapped_fsuid(mnt_userns); + kuid = mapped_fsuid(mnt_userns, &init_user_ns); if (!uid_valid(kuid)) return false; - kgid = mapped_fsgid(mnt_userns); + kgid = mapped_fsgid(mnt_userns, &init_user_ns); if (!gid_valid(kgid)) return false; return kuid_has_mapping(fs_userns, kuid) && diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 0c6ab3f4c952..ee5a217de2a8 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -196,6 +196,7 @@ static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns, /** * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping * * Use this helper to initialize a new vfs or filesystem object based on * the caller's fsuid. A common example is initializing the i_uid field of @@ -205,14 +206,16 @@ static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns, * * Return: the caller's current fsuid mapped up according to @mnt_userns. */ -static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) +static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns) { - return mapped_kuid_user(mnt_userns, &init_user_ns, current_fsuid()); + return mapped_kuid_user(mnt_userns, fs_userns, current_fsuid()); } /** * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping * * Use this helper to initialize a new vfs or filesystem object based on * the caller's fsgid. A common example is initializing the i_gid field of @@ -222,9 +225,10 @@ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) * * Return: the caller's current fsgid mapped up according to @mnt_userns. */ -static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) +static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns) { - return mapped_kgid_user(mnt_userns, &init_user_ns, current_fsgid()); + return mapped_kgid_user(mnt_userns, fs_userns, current_fsgid()); } #endif /* _LINUX_MNT_IDMAPPING_H */ -- cgit v1.2.3 From a1ec9040a2a9122605ac26e5725c6de019184419 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 3 Dec 2021 12:17:06 +0100 Subject: fs: add i_user_ns() helper Since we'll be passing the filesystem's idmapping in even more places in the following patches and we do already dereference struct inode to get to the filesystem's idmapping multiple times add a tiny helper. Link: https://lore.kernel.org/r/20211123114227.3124056-10-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-10-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-10-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner --- include/linux/fs.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index e1f28f757f1b..3d6d514943ab 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1600,6 +1600,11 @@ struct super_block { struct list_head s_inodes_wb; /* writeback inodes */ } __randomize_layout; +static inline struct user_namespace *i_user_ns(const struct inode *inode) +{ + return inode->i_sb->s_user_ns; +} + /* Helper functions so that in most cases filesystems will * not need to deal directly with kuid_t and kgid_t and can * instead deal with the raw numeric values that are stored @@ -1607,22 +1612,22 @@ struct super_block { */ static inline uid_t i_uid_read(const struct inode *inode) { - return from_kuid(inode->i_sb->s_user_ns, inode->i_uid); + return from_kuid(i_user_ns(inode), inode->i_uid); } static inline gid_t i_gid_read(const struct inode *inode) { - return from_kgid(inode->i_sb->s_user_ns, inode->i_gid); + return from_kgid(i_user_ns(inode), inode->i_gid); } static inline void i_uid_write(struct inode *inode, uid_t uid) { - inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid); + inode->i_uid = make_kuid(i_user_ns(inode), uid); } static inline void i_gid_write(struct inode *inode, gid_t gid) { - inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); + inode->i_gid = make_kgid(i_user_ns(inode), gid); } /** -- cgit v1.2.3 From bd303368b776eead1c29e6cdda82bde7128b82a7 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 3 Dec 2021 12:17:07 +0100 Subject: fs: support mapped mounts of mapped filesystems In previous patches we added new and modified existing helpers to handle idmapped mounts of filesystems mounted with an idmapping. In this final patch we convert all relevant places in the vfs to actually pass the filesystem's idmapping into these helpers. With this the vfs is in shape to handle idmapped mounts of filesystems mounted with an idmapping. Note that this is just the generic infrastructure. Actually adding support for idmapped mounts to a filesystem mountable with an idmapping is follow-up work. In this patch we extend the definition of an idmapped mount from a mount that that has the initial idmapping attached to it to a mount that has an idmapping attached to it which is not the same as the idmapping the filesystem was mounted with. As before we do not allow the initial idmapping to be attached to a mount. In addition this patch prevents that the idmapping the filesystem was mounted with can be attached to a mount created based on this filesystem. This has multiple reasons and advantages. First, attaching the initial idmapping or the filesystem's idmapping doesn't make much sense as in both cases the values of the i_{g,u}id and other places where k{g,u}ids are used do not change. Second, a user that really wants to do this for whatever reason can just create a separate dedicated identical idmapping to attach to the mount. Third, we can continue to use the initial idmapping as an indicator that a mount is not idmapped allowing us to continue to keep passing the initial idmapping into the mapping helpers to tell them that something isn't an idmapped mount even if the filesystem is mounted with an idmapping. Link: https://lore.kernel.org/r/20211123114227.3124056-11-brauner@kernel.org (v1) Link: https://lore.kernel.org/r/20211130121032.3753852-11-brauner@kernel.org (v2) Link: https://lore.kernel.org/r/20211203111707.3901969-11-brauner@kernel.org Cc: Seth Forshee Cc: Amir Goldstein Cc: Christoph Hellwig Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner --- include/linux/fs.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3d6d514943ab..493b87e3616b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1641,7 +1641,7 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return mapped_kuid_fs(mnt_userns, &init_user_ns, inode->i_uid); + return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid); } /** @@ -1655,7 +1655,7 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return mapped_kgid_fs(mnt_userns, &init_user_ns, inode->i_gid); + return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid); } /** @@ -1669,7 +1669,7 @@ static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, static inline void inode_fsuid_set(struct inode *inode, struct user_namespace *mnt_userns) { - inode->i_uid = mapped_fsuid(mnt_userns, &init_user_ns); + inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode)); } /** @@ -1683,7 +1683,7 @@ static inline void inode_fsuid_set(struct inode *inode, static inline void inode_fsgid_set(struct inode *inode, struct user_namespace *mnt_userns) { - inode->i_gid = mapped_fsgid(mnt_userns, &init_user_ns); + inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode)); } /** @@ -1704,10 +1704,10 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, kuid_t kuid; kgid_t kgid; - kuid = mapped_fsuid(mnt_userns, &init_user_ns); + kuid = mapped_fsuid(mnt_userns, fs_userns); if (!uid_valid(kuid)) return false; - kgid = mapped_fsgid(mnt_userns, &init_user_ns); + kgid = mapped_fsgid(mnt_userns, fs_userns); if (!gid_valid(kgid)) return false; return kuid_has_mapping(fs_userns, kuid) && @@ -2653,13 +2653,14 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file) * is_idmapped_mnt - check whether a mount is mapped * @mnt: the mount to check * - * If @mnt has an idmapping attached to it @mnt is mapped. + * If @mnt has an idmapping attached different from the + * filesystem's idmapping then @mnt is mapped. * * Return: true if mount is mapped, false if not. */ static inline bool is_idmapped_mnt(const struct vfsmount *mnt) { - return mnt_user_ns(mnt) != &init_user_ns; + return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns; } extern long vfs_truncate(const struct path *, loff_t); -- cgit v1.2.3 From 019cd8a9e3bcbbf6bac8036a9ae545f7858e0c08 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 2 Nov 2021 16:02:01 -0600 Subject: ARM: ixp4xx: remove unused header file pata_ixp4xx_cf.h Commit b00ced38e317 ("ARM: ixp4xx: Delete Avila boardfiles") removed the last use of but left the header file in place. Nothing uses this file, delete it now. Cc: Linus Walleij Cc: Arnd Bergmann Signed-off-by: Jonathan Corbet Acked-by: Arnd Bergmann Signed-off-by: Linus Walleij --- include/linux/platform_data/pata_ixp4xx_cf.h | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 include/linux/platform_data/pata_ixp4xx_cf.h (limited to 'include') diff --git a/include/linux/platform_data/pata_ixp4xx_cf.h b/include/linux/platform_data/pata_ixp4xx_cf.h deleted file mode 100644 index e60fa41da4a5..000000000000 --- a/include/linux/platform_data/pata_ixp4xx_cf.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PLATFORM_DATA_PATA_IXP4XX_H -#define __PLATFORM_DATA_PATA_IXP4XX_H - -#include - -/* - * This structure provide a means for the board setup code - * to give information to th pata_ixp4xx driver. It is - * passed as platform_data. - */ -struct ixp4xx_pata_data { - volatile u32 *cs0_cfg; - volatile u32 *cs1_cfg; - unsigned long cs0_bits; - unsigned long cs1_bits; - void __iomem *cmd; - void __iomem *ctl; -}; - -#endif -- cgit v1.2.3 From 94aedac49d92b22995d7b9092c6551b8b9924320 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Thu, 4 Nov 2021 09:16:20 +0200 Subject: iommu: Log iova range in map/unmap trace events In case of an iommu page fault, the faulting iova is logged in trace_io_page_fault. It is therefore convenient to log the iova range in mapping/unmapping trace events so that it is easier to see if the faulting iova was recently in any of those ranges. Signed-off-by: Dafna Hirschfeld Link: https://lore.kernel.org/r/20211104071620.27290-1-dafna.hirschfeld@collabora.com Signed-off-by: Joerg Roedel --- include/trace/events/iommu.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/iommu.h b/include/trace/events/iommu.h index 72b4582322ff..29096fe12623 100644 --- a/include/trace/events/iommu.h +++ b/include/trace/events/iommu.h @@ -101,8 +101,9 @@ TRACE_EVENT(map, __entry->size = size; ), - TP_printk("IOMMU: iova=0x%016llx paddr=0x%016llx size=%zu", - __entry->iova, __entry->paddr, __entry->size + TP_printk("IOMMU: iova=0x%016llx - 0x%016llx paddr=0x%016llx size=%zu", + __entry->iova, __entry->iova + __entry->size, __entry->paddr, + __entry->size ) ); @@ -124,8 +125,9 @@ TRACE_EVENT(unmap, __entry->unmapped_size = unmapped_size; ), - TP_printk("IOMMU: iova=0x%016llx size=%zu unmapped_size=%zu", - __entry->iova, __entry->size, __entry->unmapped_size + TP_printk("IOMMU: iova=0x%016llx - 0x%016llx size=%zu unmapped_size=%zu", + __entry->iova, __entry->iova + __entry->size, + __entry->size, __entry->unmapped_size ) ); -- cgit v1.2.3 From 063ebb19d962b45a1b505748d464bd12b5074797 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 1 Dec 2021 17:33:21 +0000 Subject: iommu/virtio: Add definitions for VIRTIO_IOMMU_F_BYPASS_CONFIG Add definitions for the VIRTIO_IOMMU_F_BYPASS_CONFIG, which supersedes VIRTIO_IOMMU_F_BYPASS. Reviewed-by: Kevin Tian Signed-off-by: Jean-Philippe Brucker Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20211201173323.1045819-2-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- include/uapi/linux/virtio_iommu.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h index 237e36a280cb..1ff357f0d72e 100644 --- a/include/uapi/linux/virtio_iommu.h +++ b/include/uapi/linux/virtio_iommu.h @@ -16,6 +16,7 @@ #define VIRTIO_IOMMU_F_BYPASS 3 #define VIRTIO_IOMMU_F_PROBE 4 #define VIRTIO_IOMMU_F_MMIO 5 +#define VIRTIO_IOMMU_F_BYPASS_CONFIG 6 struct virtio_iommu_range_64 { __le64 start; @@ -36,6 +37,8 @@ struct virtio_iommu_config { struct virtio_iommu_range_32 domain_range; /* Probe buffer size */ __le32 probe_size; + __u8 bypass; + __u8 reserved[3]; }; /* Request types */ @@ -66,11 +69,14 @@ struct virtio_iommu_req_tail { __u8 reserved[3]; }; +#define VIRTIO_IOMMU_ATTACH_F_BYPASS (1 << 0) + struct virtio_iommu_req_attach { struct virtio_iommu_req_head head; __le32 domain; __le32 endpoint; - __u8 reserved[8]; + __le32 flags; + __u8 reserved[4]; struct virtio_iommu_req_tail tail; }; -- cgit v1.2.3 From 52f982f00b220d097a71a23c149a1d18efc08e63 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Mon, 6 Dec 2021 14:24:06 +0100 Subject: security,selinux: remove security_add_mnt_opt() Its last user has been removed in commit f2aedb713c28 ("NFS: Add fs_context support."). Signed-off-by: Ondrej Mosnacek Reviewed-by: Casey Schaufler Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 -- include/linux/lsm_hooks.h | 2 -- include/linux/security.h | 8 -------- 3 files changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index ae2228f0711d..a5a724c308d8 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -78,8 +78,6 @@ LSM_HOOK(int, 0, sb_set_mnt_opts, struct super_block *sb, void *mnt_opts, LSM_HOOK(int, 0, sb_clone_mnt_opts, const struct super_block *oldsb, struct super_block *newsb, unsigned long kern_flags, unsigned long *set_kern_flags) -LSM_HOOK(int, 0, sb_add_mnt_opt, const char *option, const char *val, - int len, void **mnt_opts) LSM_HOOK(int, 0, move_mount, const struct path *from_path, const struct path *to_path) LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry, diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 52c1990644b9..3bf5c658bc44 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -180,8 +180,6 @@ * Copy all security options from a given superblock to another * @oldsb old superblock which contain information to clone * @newsb new superblock which needs filled in - * @sb_add_mnt_opt: - * Add one mount @option to @mnt_opts. * @sb_parse_opts_str: * Parse a string of security data filling in the opts structure * @options string containing all mount options known by the LSM diff --git a/include/linux/security.h b/include/linux/security.h index bb301963e333..6d72772182c8 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -313,8 +313,6 @@ int security_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb, unsigned long kern_flags, unsigned long *set_kern_flags); -int security_add_mnt_opt(const char *option, const char *val, - int len, void **mnt_opts); int security_move_mount(const struct path *from_path, const struct path *to_path); int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, @@ -711,12 +709,6 @@ static inline int security_sb_clone_mnt_opts(const struct super_block *oldsb, return 0; } -static inline int security_add_mnt_opt(const char *option, const char *val, - int len, void **mnt_opts) -{ - return 0; -} - static inline int security_move_mount(const struct path *from_path, const struct path *to_path) { -- cgit v1.2.3 From 8ab30a331946c34e4ba022c44df8624acea1c74e Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 6 Dec 2021 20:49:48 +0800 Subject: blk-mq: Drop busy_iter_fn blk_mq_hw_ctx argument The only user of blk_mq_hw_ctx blk_mq_hw_ctx argument is blk_mq_rq_inflight(). Function blk_mq_rq_inflight() uses the hctx to find the associated request queue to match against the request. However this same check is already done in caller bt_iter(), so drop this check. With that change there are no more users of busy_iter_fn blk_mq_hw_ctx argument, so drop the argument. Reviewed-by Hannes Reinecke Signed-off-by: John Garry Reviewed-by: Ming Lei Tested-by: Kashyap Desai Link: https://lore.kernel.org/r/1638794990-137490-2-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ecdc049b52fa..17ebf29e42d8 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -470,8 +470,7 @@ struct blk_mq_queue_data { bool last; }; -typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, - bool); +typedef bool (busy_iter_fn)(struct request *, void *, bool); typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); /** -- cgit v1.2.3 From fc39f8d2d1c10ac04976b0a247865bb0cec4dd88 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 6 Dec 2021 20:49:49 +0800 Subject: blk-mq: Delete busy_iter_fn Typedefs busy_iter_fn and busy_tag_iter_fn are now identical, so delete busy_iter_fn to reduce duplication. It would be nicer to delete busy_tag_iter_fn, as the name busy_iter_fn is less specific. However busy_tag_iter_fn is used in many different parts of the tree, unlike busy_iter_fn which is just use in block/, so just take the straightforward path now, so that we could rename later treewide. Signed-off-by: John Garry Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke Tested-by: Kashyap Desai Link: https://lore.kernel.org/r/1638794990-137490-3-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 17ebf29e42d8..772f8f921526 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -470,7 +470,6 @@ struct blk_mq_queue_data { bool last; }; -typedef bool (busy_iter_fn)(struct request *, void *, bool); typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); /** -- cgit v1.2.3 From 05770dd0ad110854c7157d95700d7c89979cdb3e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 22 Nov 2021 18:30:12 +0900 Subject: tracing: Support __rel_loc relative dynamic data location attribute Add '__rel_loc' new dynamic data location attribute which encodes the data location from the next to the field itself. The '__data_loc' is used for encoding the dynamic data location on the trace event record. But '__data_loc' is not useful if the writer doesn't know the event header (e.g. user event), because it records the dynamic data offset from the entry of the record, not the field itself. This new '__rel_loc' attribute encodes the data location relatively from the next of the field. For example, when there is a record like below (the number in the parentheses is the size of fields) |header(N)|common(M)|fields(K)|__data_loc(4)|fields(L)|data(G)| In this case, '__data_loc' field will be __data_loc = (G << 16) | (N+M+K+4+L) If '__rel_loc' is used, this will be |header(N)|common(M)|fields(K)|__rel_loc(4)|fields(L)|data(G)| where __rel_loc = (G << 16) | (L) This case shows L bytes after the '__rel_loc' attribute field, if there is no fields after the __rel_loc field, L must be 0. This is relatively easy (and no need to consider the kernel header change) when the event data fields are composed by user who doesn't know header and common fields. Link: https://lkml.kernel.org/r/163757341258.510314.4214431827833229956.stgit@devnote2 Cc: Beau Belgrave Cc: Namhyung Kim Cc: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 2d167ac3452c..3900404aa063 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -782,6 +782,7 @@ enum { FILTER_OTHER = 0, FILTER_STATIC_STRING, FILTER_DYN_STRING, + FILTER_RDYN_STRING, FILTER_PTR_STRING, FILTER_TRACE_FN, FILTER_COMM, -- cgit v1.2.3 From 55de2c0b5610cba5a5a93c0788031133c457e689 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 22 Nov 2021 18:30:21 +0900 Subject: tracing: Add '__rel_loc' using trace event macros Add '__rel_loc' using trace event macros. These macros are usually not used in the kernel, except for testing purpose. This also add "rel_" variant of macros for dynamic_array string, and bitmask. Link: https://lkml.kernel.org/r/163757342119.510314.816029622439099016.stgit@devnote2 Cc: Beau Belgrave Cc: Namhyung Kim Cc: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/trace/bpf_probe.h | 16 ++++++ include/trace/perf.h | 16 ++++++ include/trace/trace_events.h | 120 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 150 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index a8e97f84b652..7660a7846586 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -21,6 +21,22 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) +#undef __get_rel_dynamic_array +#define __get_rel_dynamic_array(field) \ + ((void *)(&__entry->__rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ + (__entry->__rel_loc_##field & 0xffff)) + +#undef __get_rel_dynamic_array_len +#define __get_rel_dynamic_array_len(field) \ + ((__entry->__rel_loc_##field >> 16) & 0xffff) + +#undef __get_rel_str +#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) + +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) + #undef __perf_count #define __perf_count(c) (c) diff --git a/include/trace/perf.h b/include/trace/perf.h index dbc6c74defc3..ea4405de175a 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -21,6 +21,22 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) +#undef __get_rel_dynamic_array +#define __get_rel_dynamic_array(field) \ + ((void *)(&__entry->__rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ + (__entry->__rel_loc_##field & 0xffff)) + +#undef __get_rel_dynamic_array_len +#define __get_rel_dynamic_array_len(field) \ + ((__entry->__rel_loc_##field >> 16) & 0xffff) + +#undef __get_rel_str +#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) + +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) + #undef __perf_count #define __perf_count(c) (__count = (c)) diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 08810a463880..8c6f7c433518 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -108,6 +108,18 @@ TRACE_MAKE_SYSTEM_STR(); #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1) + #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args @@ -200,11 +212,23 @@ TRACE_MAKE_SYSTEM_STR(); #undef __string #define __string(item, src) __dynamic_array(char, item, -1) +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) + #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) -#undef __bitmask -#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) u32 item; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -293,6 +317,19 @@ TRACE_MAKE_SYSTEM_STR(); #undef __get_str #define __get_str(field) ((char *)__get_dynamic_array(field)) +#undef __get_rel_dynamic_array +#define __get_rel_dynamic_array(field) \ + ((void *)(&__entry->__rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ + (__entry->__rel_loc_##field & 0xffff)) + +#undef __get_rel_dynamic_array_len +#define __get_rel_dynamic_array_len(field) \ + ((__entry->__rel_loc_##field >> 16) & 0xffff) + +#undef __get_rel_str +#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) + #undef __get_bitmask #define __get_bitmask(field) \ ({ \ @@ -302,6 +339,15 @@ TRACE_MAKE_SYSTEM_STR(); trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ }) +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) \ + ({ \ + void *__bitmask = __get_rel_dynamic_array(field); \ + unsigned int __bitmask_size; \ + __bitmask_size = __get_rel_dynamic_array_len(field); \ + trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ + }) + #undef __print_flags #define __print_flags(flag, delim, flag_array...) \ ({ \ @@ -471,6 +517,21 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) +#undef __rel_dynamic_array +#define __rel_dynamic_array(_type, _item, _len) { \ + .type = "__rel_loc " #_type "[]", .name = #_item, \ + .size = 4, .align = 4, \ + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static struct trace_event_fields trace_event_fields_##call[] = { \ @@ -519,6 +580,22 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) \ + __item_length = (len) * sizeof(type); \ + __data_offsets->item = __data_size + \ + offsetof(typeof(*entry), __data) - \ + offsetof(typeof(*entry), __rel_loc_##item) - \ + sizeof(u32); \ + __data_offsets->item |= __item_length << 16; \ + __data_size += __item_length; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, \ + strlen((src) ? (const char *)(src) : "(null)") + 1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, (len) + 1) /* * __bitmask_size_in_bytes_raw is the number of bytes needed to hold * num_possible_cpus(). @@ -542,6 +619,10 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, \ __bitmask_size_in_longs(nr_bits)) +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \ + __bitmask_size_in_longs(nr_bits)) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static inline notrace int trace_event_get_offsets_##call( \ @@ -706,6 +787,37 @@ static inline notrace int trace_event_get_offsets_##call( \ #define __assign_bitmask(dst, src, nr_bits) \ memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) \ + __entry->__rel_loc_##item = __data_offsets.item; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __assign_rel_str +#define __assign_rel_str(dst, src) \ + strcpy(__get_rel_str(dst), (src) ? (const char *)(src) : "(null)"); + +#undef __assign_rel_str_len +#define __assign_rel_str_len(dst, src, len) \ + do { \ + memcpy(__get_rel_str(dst), (src), (len)); \ + __get_rel_str(dst)[len] = '\0'; \ + } while (0) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) + +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) + +#undef __assign_rel_bitmask +#define __assign_rel_bitmask(dst, src, nr_bits) \ + memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) + #undef TP_fast_assign #define TP_fast_assign(args...) args @@ -770,6 +882,10 @@ static inline void ftrace_test_probe_##call(void) \ #undef __get_dynamic_array_len #undef __get_str #undef __get_bitmask +#undef __get_rel_dynamic_array +#undef __get_rel_dynamic_array_len +#undef __get_rel_str +#undef __get_rel_bitmask #undef __print_array #undef __print_hex_dump -- cgit v1.2.3 From 8c33915d77a565b8b5d44e6368e22b6ea300b7a8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 28 Nov 2021 20:00:29 +0100 Subject: platform/x86: wmi: Add no_notify_data flag to struct wmi_driver Some WMI implementations do notifies on WMI objects without a _WED method allow WMI drivers to indicate that _WED should not be called for notifies on the WMI objects the driver is bound to. Instead the driver's notify callback will simply be called with a NULL data argument. Reported-by: Yauhen Kharuzhy Reviewed-by: Andy Shevchenko Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211128190031.405620-3-hdegoede@redhat.com --- include/linux/wmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 2cb3913c1f50..b88d7b58e61e 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -35,6 +35,7 @@ extern int set_required_buffer_size(struct wmi_device *wdev, u64 length); struct wmi_driver { struct device_driver driver; const struct wmi_device_id *id_table; + bool no_notify_data; int (*probe)(struct wmi_device *wdev, const void *context); void (*remove)(struct wmi_device *wdev); -- cgit v1.2.3 From 4e66934eaadc83b27ada8d42b60894018f3bfabf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:21:55 -0800 Subject: lib: add reference counting tracking infrastructure It can be hard to track where references are taken and released. In networking, we have annoying issues at device or netns dismantles, and we had various proposals to ease root causing them. This patch adds new infrastructure pairing refcount increases and decreases. This will self document code, because programmers will have to associate increments/decrements. This is controled by CONFIG_REF_TRACKER which can be selected by users of this feature. This adds both cpu and memory costs, and thus should probably be used with care. Signed-off-by: Eric Dumazet Reviewed-by: Dmitry Vyukov Signed-off-by: Jakub Kicinski --- include/linux/ref_tracker.h | 73 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 include/linux/ref_tracker.h (limited to 'include') diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h new file mode 100644 index 000000000000..c11c9db5825c --- /dev/null +++ b/include/linux/ref_tracker.h @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#ifndef _LINUX_REF_TRACKER_H +#define _LINUX_REF_TRACKER_H +#include +#include +#include + +struct ref_tracker; + +struct ref_tracker_dir { +#ifdef CONFIG_REF_TRACKER + spinlock_t lock; + unsigned int quarantine_avail; + refcount_t untracked; + struct list_head list; /* List of active trackers */ + struct list_head quarantine; /* List of dead trackers */ +#endif +}; + +#ifdef CONFIG_REF_TRACKER +static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, + unsigned int quarantine_count) +{ + INIT_LIST_HEAD(&dir->list); + INIT_LIST_HEAD(&dir->quarantine); + spin_lock_init(&dir->lock); + dir->quarantine_avail = quarantine_count; + refcount_set(&dir->untracked, 1); +} + +void ref_tracker_dir_exit(struct ref_tracker_dir *dir); + +void ref_tracker_dir_print(struct ref_tracker_dir *dir, + unsigned int display_limit); + +int ref_tracker_alloc(struct ref_tracker_dir *dir, + struct ref_tracker **trackerp, gfp_t gfp); + +int ref_tracker_free(struct ref_tracker_dir *dir, + struct ref_tracker **trackerp); + +#else /* CONFIG_REF_TRACKER */ + +static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, + unsigned int quarantine_count) +{ +} + +static inline void ref_tracker_dir_exit(struct ref_tracker_dir *dir) +{ +} + +static inline void ref_tracker_dir_print(struct ref_tracker_dir *dir, + unsigned int display_limit) +{ +} + +static inline int ref_tracker_alloc(struct ref_tracker_dir *dir, + struct ref_tracker **trackerp, + gfp_t gfp) +{ + return 0; +} + +static inline int ref_tracker_free(struct ref_tracker_dir *dir, + struct ref_tracker **trackerp) +{ + return 0; +} + +#endif + +#endif /* _LINUX_REF_TRACKER_H */ -- cgit v1.2.3 From 4d92b95ff2f95f13df9bad0b5a25a9f60e72758d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:21:57 -0800 Subject: net: add net device refcount tracker infrastructure net device are refcounted. Over the years we had numerous bugs caused by imbalanced dev_hold() and dev_put() calls. The general idea is to be able to precisely pair each decrement with a corresponding prior increment. Both share a cookie, basically a pointer to private data storing stack traces. This patch adds dev_hold_track() and dev_put_track(). To use these helpers, each data structure owning a refcount should also use a "netdevice_tracker" to pair the hold and put. netdevice_tracker dev_tracker; ... dev_hold_track(dev, &dev_tracker, GFP_ATOMIC); ... dev_put_track(dev, &dev_tracker); Whenever a leak happens, we will get precise stack traces of the point dev_hold_track() happened, at device dismantle phase. We will also get a stack trace if too many dev_put_track() for the same netdevice_tracker are attempted. This is guarded by CONFIG_NET_DEV_REFCNT_TRACKER option. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 65117f01d5f2..143d60ed0047 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -48,6 +48,7 @@ #include #include #include +#include struct netpoll_info; struct device; @@ -300,6 +301,12 @@ enum netdev_state_t { }; +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +typedef struct ref_tracker *netdevice_tracker; +#else +typedef struct {} netdevice_tracker; +#endif + struct gro_list { struct list_head list; int count; @@ -1865,6 +1872,7 @@ enum netdev_ml_priv_type { * @proto_down_reason: reason a netdev interface is held down * @pcpu_refcnt: Number of references to this device * @dev_refcnt: Number of references to this device + * @refcnt_tracker: Tracker directory for tracked references to this device * @todo_list: Delayed register/unregister * @link_watch_list: XXX: need comments on this one * @@ -2178,6 +2186,7 @@ struct net_device { #else refcount_t dev_refcnt; #endif + struct ref_tracker_dir refcnt_tracker; struct list_head link_watch_list; @@ -3805,6 +3814,7 @@ void netdev_run_todo(void); * @dev: network device * * Release reference to device to allow it to be freed. + * Try using dev_put_track() instead. */ static inline void dev_put(struct net_device *dev) { @@ -3822,6 +3832,7 @@ static inline void dev_put(struct net_device *dev) * @dev: network device * * Hold reference to device to keep it from being freed. + * Try using dev_hold_track() instead. */ static inline void dev_hold(struct net_device *dev) { @@ -3834,6 +3845,40 @@ static inline void dev_hold(struct net_device *dev) } } +static inline void netdev_tracker_alloc(struct net_device *dev, + netdevice_tracker *tracker, gfp_t gfp) +{ +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp); +#endif +} + +static inline void netdev_tracker_free(struct net_device *dev, + netdevice_tracker *tracker) +{ +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + ref_tracker_free(&dev->refcnt_tracker, tracker); +#endif +} + +static inline void dev_hold_track(struct net_device *dev, + netdevice_tracker *tracker, gfp_t gfp) +{ + if (dev) { + dev_hold(dev); + netdev_tracker_alloc(dev, tracker, gfp); + } +} + +static inline void dev_put_track(struct net_device *dev, + netdevice_tracker *tracker) +{ + if (dev) { + netdev_tracker_free(dev, tracker); + dev_put(dev); + } +} + /* Carrier loss detection, dial on demand. The functions netif_carrier_on * and _off may be called from IRQ context, but it is caller * who is responsible for serialization of these calls. -- cgit v1.2.3 From 80e8921b2b72c300ca56a01729004d30bedb82cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:21:58 -0800 Subject: net: add net device refcount tracker to struct netdev_rx_queue This helps debugging net device refcount leaks. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 143d60ed0047..3d691fadd569 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -741,6 +741,8 @@ struct netdev_rx_queue { #endif struct kobject kobj; struct net_device *dev; + netdevice_tracker dev_tracker; + #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; #endif -- cgit v1.2.3 From 0b688f24b7d611db3a02f3d4ab562d049c78a17d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:21:59 -0800 Subject: net: add net device refcount tracker to struct netdev_queue This will help debugging pesky netdev reference leaks. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3d691fadd569..b4f704337f65 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -586,6 +586,8 @@ struct netdev_queue { * read-mostly part */ struct net_device *dev; + netdevice_tracker dev_tracker; + struct Qdisc __rcu *qdisc; struct Qdisc *qdisc_sleeping; #ifdef CONFIG_SYSFS -- cgit v1.2.3 From 4dbd24f65c60259ce5d1563433ecaf5fab693c83 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:22:02 -0800 Subject: drop_monitor: add net device refcount tracker We want to track all dev_hold()/dev_put() to ease leak hunting. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 043fcec8b0aa..3276a29f2b81 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -664,13 +664,17 @@ struct devlink_health_reporter_ops { * @trap_name: Trap name. * @trap_group_name: Trap group name. * @input_dev: Input netdevice. + * @dev_tracker: refcount tracker for @input_dev. * @fa_cookie: Flow action user cookie. * @trap_type: Trap type. */ struct devlink_trap_metadata { const char *trap_name; const char *trap_group_name; + struct net_device *input_dev; + netdevice_tracker dev_tracker; + const struct flow_action_cookie *fa_cookie; enum devlink_trap_type trap_type; }; -- cgit v1.2.3 From 9038c320001dd07f60736018edf608ac5baca0ab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:22:03 -0800 Subject: net: dst: add net device refcount tracking to dst_entry We want to track all dev_hold()/dev_put() to ease leak hunting. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 17 +++++++++++++++++ include/net/dst.h | 1 + 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b4f704337f65..afed3b10491b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3883,6 +3883,23 @@ static inline void dev_put_track(struct net_device *dev, } } +static inline void dev_replace_track(struct net_device *odev, + struct net_device *ndev, + netdevice_tracker *tracker, + gfp_t gfp) +{ +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + if (odev) + ref_tracker_free(&odev->refcnt_tracker, tracker); +#endif + dev_hold(ndev); + dev_put(odev); +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + if (ndev) + ref_tracker_alloc(&ndev->refcnt_tracker, tracker, gfp); +#endif +} + /* Carrier loss detection, dial on demand. The functions netif_carrier_on * and _off may be called from IRQ context, but it is caller * who is responsible for serialization of these calls. diff --git a/include/net/dst.h b/include/net/dst.h index a057319aabef..6aa252c3fc55 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -77,6 +77,7 @@ struct dst_entry { #ifndef CONFIG_64BIT atomic_t __refcnt; /* 32-bit offset 64 */ #endif + netdevice_tracker dev_tracker; }; struct dst_metrics { -- cgit v1.2.3 From c0fd407a0666a583a765cfb129c4dc492590ca89 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:22:05 -0800 Subject: sit: add net device refcount tracking to ip_tunnel Note that other ip_tunnel users do not seem to hold a reference on tunnel->dev. Probably needs some investigations. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index bc3b13ec93c9..0219fe907b26 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -104,7 +104,10 @@ struct metadata_dst; struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; + struct net_device *dev; + netdevice_tracker dev_tracker; + struct net *net; /* netns for packet i/o */ unsigned long err_time; /* Time when the last ICMP error -- cgit v1.2.3 From 56c1c77948ba3576df1c387cefcf3bab93600822 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:22:06 -0800 Subject: ipv6: add net device refcount tracker to struct ip6_tnl Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/ip6_tunnel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 028eaea1c854..a38c4f1e4e5c 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -46,6 +46,7 @@ struct __ip6_tnl_parm { struct ip6_tnl { struct ip6_tnl __rcu *next; /* next tunnel in list */ struct net_device *dev; /* virtual device associated with tunnel */ + netdevice_tracker dev_tracker; struct net *net; /* netns for packet i/o */ struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ -- cgit v1.2.3 From 85662c9f8cbd4c96088ff99f56bc3d1097d0ac07 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:22:07 -0800 Subject: net: add net device refcount tracker to struct neighbour Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 55af812c3ed5..190b07fe089e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -158,6 +158,7 @@ struct neighbour { struct list_head managed_list; struct rcu_head rcu; struct net_device *dev; + netdevice_tracker dev_tracker; u8 primary_key[0]; } __randomize_layout; -- cgit v1.2.3 From 77a23b1f954381d7999ce069d3fc8658eb6a9bbc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:22:08 -0800 Subject: net: add net device refcount tracker to struct pneigh_entry Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 190b07fe089e..5fffb783670a 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -174,6 +174,7 @@ struct pneigh_entry { struct pneigh_entry *next; possible_net_t net; struct net_device *dev; + netdevice_tracker dev_tracker; u32 flags; u8 protocol; u8 key[]; -- cgit v1.2.3 From 08d622568e5a58adebc8cb801599d3f181a6b687 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:22:09 -0800 Subject: net: add net device refcount tracker to struct neigh_parms Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 5fffb783670a..937389e04c8e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -70,6 +70,7 @@ enum { struct neigh_parms { possible_net_t net; struct net_device *dev; + netdevice_tracker dev_tracker; struct list_head list; int (*neigh_setup)(struct neighbour *); struct neigh_table *tbl; -- cgit v1.2.3 From 8c727003c4d0c776bd286d65c347591734d1d841 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:22:11 -0800 Subject: ipv6: add net device refcount tracker to struct inet6_dev Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/if_inet6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 653e7d0f65cb..f026cf08a8e8 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -160,6 +160,7 @@ struct ipv6_devstat { struct inet6_dev { struct net_device *dev; + netdevice_tracker dev_tracker; struct list_head addr_list; -- cgit v1.2.3 From c04438f58d140723e58050fcb9d33d84cb39e9e9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:22:12 -0800 Subject: ipv4: add net device refcount tracker to struct in_device Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/inetdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 518b484a7f07..674aeead6260 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -24,6 +24,8 @@ struct ipv4_devconf { struct in_device { struct net_device *dev; + netdevice_tracker dev_tracker; + refcount_t refcnt; int dead; struct in_ifaddr __rcu *ifa_list;/* IP ifaddr chain */ -- cgit v1.2.3 From 606509f27f67748b92f783a75a6e39cfaa2fe92d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:22:13 -0800 Subject: net/sched: add net device refcount tracker to struct Qdisc Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 22179b2fda72..dbf202bb1a0e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -125,7 +125,7 @@ struct Qdisc { spinlock_t seqlock; struct rcu_head rcu; - + netdevice_tracker dev_tracker; /* private data */ long privdata[] ____cacheline_aligned; }; -- cgit v1.2.3 From 63f13937cbe9b00982dfc8e578b1aec8e5037333 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:22:14 -0800 Subject: net: linkwatch: add net device refcount tracker Add a netdevice_tracker inside struct net_device, to track the self reference when a device is in lweventlist. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index afed3b10491b..69dca1edd5a6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1950,6 +1950,7 @@ enum netdev_ml_priv_type { * keep a list of interfaces to be deleted. * * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. + * @linkwatch_dev_tracker: refcount tracker used by linkwatch. * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2280,6 +2281,7 @@ struct net_device { struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE]; u8 dev_addr_shadow[MAX_ADDR_LEN]; + netdevice_tracker linkwatch_dev_tracker; }; #define to_net_dev(d) container_of(d, struct net_device, dev) -- cgit v1.2.3 From 095e200f175f9843642343a4a48087fcfa4d3751 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:22:15 -0800 Subject: net: failover: add net device refcount tracker Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/failover.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/failover.h b/include/net/failover.h index bb15438f39c7..f2b42b4b9cd6 100644 --- a/include/net/failover.h +++ b/include/net/failover.h @@ -25,6 +25,7 @@ struct failover_ops { struct failover { struct list_head list; struct net_device __rcu *failover_dev; + netdevice_tracker dev_tracker; struct failover_ops __rcu *ops; }; -- cgit v1.2.3 From 42120a86438379eb77424831ae3d696c2d5cb622 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:22:16 -0800 Subject: ipmr, ip6mr: add net device refcount tracker to struct vif_device Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/mroute_base.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 8071148f29a6..e05ee9f001ff 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -12,6 +12,7 @@ /** * struct vif_device - interface representor for multicast routing * @dev: network device being used + * @dev_tracker: refcount tracker for @dev reference * @bytes_in: statistic; bytes ingressing * @bytes_out: statistic; bytes egresing * @pkt_in: statistic; packets ingressing @@ -26,6 +27,7 @@ */ struct vif_device { struct net_device *dev; + netdevice_tracker dev_tracker; unsigned long bytes_in, bytes_out; unsigned long pkt_in, pkt_out; unsigned long rate_limit; -- cgit v1.2.3 From 5fa5ae605821e0e10ee489d9a6e331fd287ccc57 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Dec 2021 20:22:17 -0800 Subject: netpoll: add net device refcount tracker to struct netpoll Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/netpoll.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index e6a2d72e0dc7..bd19c4b91e31 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -24,6 +24,7 @@ union inet_addr { struct netpoll { struct net_device *dev; + netdevice_tracker dev_tracker; char dev_name[IFNAMSIZ]; const char *name; -- cgit v1.2.3 From 45cac6754529ae17345d8f5b632d9e602a091a20 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Dec 2021 20:53:56 -0800 Subject: net: fix recent csum changes Vladimir reported csum issues after my recent change in skb_postpull_rcsum() Issue here is the following: initial skb->csum is the csum of [part to be pulled][rest of packet] Old code: skb->csum = csum_sub(skb->csum, csum_partial(pull, pull_length, 0)); New code: skb->csum = ~csum_partial(pull, pull_length, ~skb->csum); This is broken if the csum of [pulled part] happens to be equal to skb->csum, because end result of skb->csum is 0 in new code, instead of being 0xffffffff David Laight suggested to use skb->csum = -csum_partial(pull, pull_length, -skb->csum); I based my patches on existing code present in include/net/seg6.h, update_csum_diff4() and update_csum_diff16() which might need a similar fix. I guess that my tests, mostly pulling 40 bytes of IPv6 header were not providing enough entropy to hit this bug. v2: added wsum_negate() to make sparse happy. Fixes: 29c3002644bd ("net: optimize skb_postpull_rcsum()") Fixes: 0bd28476f636 ("gro: optimize skb_gro_postpull_rcsum()") Signed-off-by: Eric Dumazet Reported-by: Vladimir Oltean Suggested-by: David Laight Cc: David Lebrun Tested-by: Vladimir Oltean Link: https://lore.kernel.org/r/20211204045356.3659278-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 3 ++- include/net/checksum.h | 4 ++++ include/net/gro.h | 4 ++-- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index eae4bd3237a4..dd262bd8ddbe 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3486,7 +3486,8 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = ~csum_partial(start, len, ~skb->csum); + skb->csum = wsum_negate(csum_partial(start, len, + wsum_negate(skb->csum))); else if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_start_offset(skb) < 0) skb->ip_summed = CHECKSUM_NONE; diff --git a/include/net/checksum.h b/include/net/checksum.h index 5b96d5bd6e54..5218041e5c8f 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -180,4 +180,8 @@ static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); } +static inline __wsum wsum_negate(__wsum val) +{ + return (__force __wsum)-((__force u32)val); +} #endif diff --git a/include/net/gro.h b/include/net/gro.h index b1139fca7c43..8f75802d50fd 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -173,8 +173,8 @@ static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { if (NAPI_GRO_CB(skb)->csum_valid) - NAPI_GRO_CB(skb)->csum = ~csum_partial(start, len, - ~NAPI_GRO_CB(skb)->csum); + NAPI_GRO_CB(skb)->csum = wsum_negate(csum_partial(start, len, + wsum_negate(NAPI_GRO_CB(skb)->csum))); } /* GRO checksum functions. These are logical equivalents of the normal -- cgit v1.2.3 From f0e6e6fa41b3d2aa1dcb61dd4ed6d7be004bb5a8 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 18 Oct 2021 17:14:07 +0200 Subject: KVM: Drop stale kvm_is_transparent_hugepage() declaration kvm_is_transparent_hugepage() was removed in commit 205d76ff0684 ("KVM: Remove kvm_is_transparent_hugepage() and PageTransCompoundMap()") but its declaration in include/linux/kvm_host.h persisted. Drop it. Fixes: 205d76ff0684 (""KVM: Remove kvm_is_transparent_hugepage() and PageTransCompoundMap()") Signed-off-by: Vitaly Kuznetsov Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211018151407.2107363-1-vkuznets@redhat.com --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c310648cc8f1..6d138adc78af 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1174,7 +1174,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); bool kvm_is_reserved_pfn(kvm_pfn_t pfn); bool kvm_is_zone_device_pfn(kvm_pfn_t pfn); -bool kvm_is_transparent_hugepage(kvm_pfn_t pfn); struct kvm_irq_ack_notifier { struct hlist_node link; -- cgit v1.2.3 From a9e6107616bb8108aa4fc22584a05e69761a91f7 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 1 Dec 2021 13:41:26 +0100 Subject: media: cec: fix a deadlock situation The cec_devnode struct has a lock meant to serialize access to the fields of this struct. This lock is taken during device node (un)registration and when opening or releasing a filehandle to the device node. When the last open filehandle is closed the cec adapter might be disabled by calling the adap_enable driver callback with the devnode.lock held. However, if during that callback a message or event arrives then the driver will call one of the cec_queue_event() variants in cec-adap.c, and those will take the same devnode.lock to walk the open filehandle list. This obviously causes a deadlock. This is quite easy to reproduce with the cec-gpio driver since that uses the cec-pin framework which generated lots of events and uses a kernel thread for the processing, so when adap_enable is called the thread is still running and can generate events. But I suspect that it might also happen with other drivers if an interrupt arrives signaling e.g. a received message before adap_enable had a chance to disable the interrupts. This patch adds a new mutex to serialize access to the fhs list. When adap_enable() is called the devnode.lock mutex is held, but not devnode.lock_fhs. The event functions in cec-adap.c will now use devnode.lock_fhs instead of devnode.lock, ensuring that it is safe to call those functions from the adap_enable callback. This specific issue only happens if the last open filehandle is closed and the physical address is invalid. This is not something that happens during normal operation, but it does happen when monitoring CEC traffic (e.g. cec-ctl --monitor) with an unconfigured CEC adapter. Signed-off-by: Hans Verkuil Cc: # for v5.13 and up Signed-off-by: Mauro Carvalho Chehab --- include/media/cec.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/media/cec.h b/include/media/cec.h index 208c9613c07e..77346f757036 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -26,13 +26,17 @@ * @dev: cec device * @cdev: cec character device * @minor: device node minor number + * @lock: lock to serialize open/release and registration * @registered: the device was correctly registered * @unregistered: the device was unregistered + * @lock_fhs: lock to control access to @fhs * @fhs: the list of open filehandles (cec_fh) - * @lock: lock to control access to this structure * * This structure represents a cec-related device node. * + * To add or remove filehandles from @fhs the @lock must be taken first, + * followed by @lock_fhs. It is safe to access @fhs if either lock is held. + * * The @parent is a physical device. It must be set by core or device drivers * before registering the node. */ @@ -43,10 +47,13 @@ struct cec_devnode { /* device info */ int minor; + /* serialize open/release and registration */ + struct mutex lock; bool registered; bool unregistered; + /* protect access to fhs */ + struct mutex lock_fhs; struct list_head fhs; - struct mutex lock; }; struct cec_adapter; -- cgit v1.2.3 From ee1806beff85d4f1a3d7f22aa1bcdc8ffb59b36c Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Wed, 1 Dec 2021 23:56:51 +0100 Subject: media: videobuf2: add WARN_ON_ONCE if bytesused is bigger than buffer length In function vb2_set_plane_payload, report if the given bytesused is bigger than the buffer size, and clamp it to the buffer size. Signed-off-by: Dafna Hirschfeld Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/videobuf2-core.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index 2467284e5f26..5468b633b9d2 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -1155,8 +1155,15 @@ static inline void *vb2_get_drv_priv(struct vb2_queue *q) static inline void vb2_set_plane_payload(struct vb2_buffer *vb, unsigned int plane_no, unsigned long size) { - if (plane_no < vb->num_planes) + /* + * size must never be larger than the buffer length, so + * warn and clamp to the buffer length if that's the case. + */ + if (plane_no < vb->num_planes) { + if (WARN_ON_ONCE(size > vb->planes[plane_no].length)) + size = vb->planes[plane_no].length; vb->planes[plane_no].bytesused = size; + } } /** -- cgit v1.2.3 From 77993b595ada5731e513eb06a0f4bf4b9f1e9532 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 29 Nov 2021 18:46:54 +0100 Subject: locking: Allow to include asm/spinlock_types.h from linux/spinlock_types_raw.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The printk header file includes ratelimit_types.h for its __ratelimit() based usage. It is required for the static initializer used in printk_ratelimited(). It uses a raw_spinlock_t and includes the spinlock_types.h. PREEMPT_RT substitutes spinlock_t with a rtmutex based implementation and so its spinlock_t implmentation (provided by spinlock_rt.h) includes rtmutex.h and atomic.h which leads to recursive includes where defines are missing. By including only the raw_spinlock_t defines it avoids the atomic.h related includes at this stage. An example on powerpc: | CALL scripts/atomic/check-atomics.sh |In file included from include/linux/bug.h:5, | from include/linux/page-flags.h:10, | from kernel/bounds.c:10: |arch/powerpc/include/asm/page_32.h: In function ‘clear_page’: |arch/powerpc/include/asm/bug.h:87:4: error: implicit declaration of function â=80=98__WARNâ=80=99 [-Werror=3Dimplicit-function-declaration] | 87 | __WARN(); \ | | ^~~~~~ |arch/powerpc/include/asm/page_32.h:48:2: note: in expansion of macro ‘WARN_ONâ€=99 | 48 | WARN_ON((unsigned long)addr & (L1_CACHE_BYTES - 1)); | | ^~~~~~~ |arch/powerpc/include/asm/bug.h:58:17: error: invalid application of ‘sizeofâ€=99 to incomplete type ‘struct bug_entryâ€=99 | 58 | "i" (sizeof(struct bug_entry)), \ | | ^~~~~~ |arch/powerpc/include/asm/bug.h:89:3: note: in expansion of macro ‘BUG_ENTRYâ€=99 | 89 | BUG_ENTRY(PPC_TLNEI " %4, 0", \ | | ^~~~~~~~~ |arch/powerpc/include/asm/page_32.h:48:2: note: in expansion of macro ‘WARN_ONâ€=99 | 48 | WARN_ON((unsigned long)addr & (L1_CACHE_BYTES - 1)); | | ^~~~~~~ |In file included from arch/powerpc/include/asm/ptrace.h:298, | from arch/powerpc/include/asm/hw_irq.h:12, | from arch/powerpc/include/asm/irqflags.h:12, | from include/linux/irqflags.h:16, | from include/asm-generic/cmpxchg-local.h:6, | from arch/powerpc/include/asm/cmpxchg.h:526, | from arch/powerpc/include/asm/atomic.h:11, | from include/linux/atomic.h:7, | from include/linux/rwbase_rt.h:6, | from include/linux/rwlock_types.h:55, | from include/linux/spinlock_types.h:74, | from include/linux/ratelimit_types.h:7, | from include/linux/printk.h:10, | from include/asm-generic/bug.h:22, | from arch/powerpc/include/asm/bug.h:109, | from include/linux/bug.h:5, | from include/linux/page-flags.h:10, | from kernel/bounds.c:10: |include/linux/thread_info.h: In function â=80=98copy_overflowâ=80=99: |include/linux/thread_info.h:210:2: error: implicit declaration of function â=80=98WARNâ=80=99 [-Werror=3Dimplicit-function-declaration] | 210 | WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); | | ^~~~ The WARN / BUG include pulls in printk.h and then ptrace.h expects WARN (from bug.h) which is not yet complete. Even hw_irq.h has WARN_ON() statements. On POWERPC64 there are missing atomic64 defines while building 32bit VDSO: | VDSO32C arch/powerpc/kernel/vdso32/vgettimeofday.o |In file included from include/linux/atomic.h:80, | from include/linux/rwbase_rt.h:6, | from include/linux/rwlock_types.h:55, | from include/linux/spinlock_types.h:74, | from include/linux/ratelimit_types.h:7, | from include/linux/printk.h:10, | from include/linux/kernel.h:19, | from arch/powerpc/include/asm/page.h:11, | from arch/powerpc/include/asm/vdso/gettimeofday.h:5, | from include/vdso/datapage.h:137, | from lib/vdso/gettimeofday.c:5, | from : |include/linux/atomic-arch-fallback.h: In function ‘arch_atomic64_incâ€=99: |include/linux/atomic-arch-fallback.h:1447:2: error: implicit declaration of function ‘arch_atomic64_add’; did you mean ‘arch_atomic_add’? [-Werror=3Dimpl |icit-function-declaration] | 1447 | arch_atomic64_add(1, v); | | ^~~~~~~~~~~~~~~~~ | | arch_atomic_add The generic fallback is not included, atomics itself are not used. If kernel.h does not include printk.h then it comes later from the bug.h include. Allow asm/spinlock_types.h to be included from linux/spinlock_types_raw.h. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20211129174654.668506-12-bigeasy@linutronix.de --- include/linux/ratelimit_types.h | 2 +- include/linux/spinlock_types_up.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h index b676aa419eef..c21c7f8103e2 100644 --- a/include/linux/ratelimit_types.h +++ b/include/linux/ratelimit_types.h @@ -4,7 +4,7 @@ #include #include -#include +#include #define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) #define DEFAULT_RATELIMIT_BURST 10 diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h index c09b6407ae1b..7f86a2016ac5 100644 --- a/include/linux/spinlock_types_up.h +++ b/include/linux/spinlock_types_up.h @@ -1,7 +1,7 @@ #ifndef __LINUX_SPINLOCK_TYPES_UP_H #define __LINUX_SPINLOCK_TYPES_UP_H -#ifndef __LINUX_SPINLOCK_TYPES_H +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H # error "please don't include this file directly" #endif -- cgit v1.2.3 From 8d9f738f16a3ee9f2341578873c542ddd9802fe4 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Tue, 7 Dec 2021 20:32:30 +0800 Subject: regulator: fix bullet lists of regulator_ops comment Since 89a6a5e56c82("regulator: add property parsing and callbacks to set protection limits") which introduced a warning: Documentation/driver-api/regulator:166: ./include/linux/regulator/driver.h:96: WARNING: Unexpected indentation. Documentation/driver-api/regulator:166: ./include/linux/regulator/driver.h:98: WARNING: Block quote ends without a blank line; unexpected unindent. Let's fix them. Signed-off-by: Yanteng Si Link: https://lore.kernel.org/r/20211207123230.2262047-1-siyanteng@loongson.cn Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 4078c7776453..720684995a77 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -90,15 +90,19 @@ enum regulator_detection_severity { * @set_over_current_protection: Support enabling of and setting limits for over * current situation detection. Detection can be configured for three * levels of severity. - * REGULATOR_SEVERITY_PROT should automatically shut down the regulator(s). - * REGULATOR_SEVERITY_ERR should indicate that over-current situation is - * caused by an unrecoverable error but HW does not perform - * automatic shut down. - * REGULATOR_SEVERITY_WARN should indicate situation where hardware is - * still believed to not be damaged but that a board sepcific - * recovery action is needed. If lim_uA is 0 the limit should not - * be changed but the detection should just be enabled/disabled as - * is requested. + * + * - REGULATOR_SEVERITY_PROT should automatically shut down the regulator(s). + * + * - REGULATOR_SEVERITY_ERR should indicate that over-current situation is + * caused by an unrecoverable error but HW does not perform + * automatic shut down. + * + * - REGULATOR_SEVERITY_WARN should indicate situation where hardware is + * still believed to not be damaged but that a board sepcific + * recovery action is needed. If lim_uA is 0 the limit should not + * be changed but the detection should just be enabled/disabled as + * is requested. + * * @set_over_voltage_protection: Support enabling of and setting limits for over * voltage situation detection. Detection can be configured for same * severities as over current protection. Units of uV. -- cgit v1.2.3 From 500daa0e6be292edcf635ba6b090b89da80e90a8 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 1 Dec 2021 16:32:56 +0900 Subject: dt-bindings: power: Add r8a779f0 SYSC power domain definitions Add power domain indices for R-Car S4-8 (r8a779f0). Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20211201073308.1003945-3-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/power/r8a779f0-sysc.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 include/dt-bindings/power/r8a779f0-sysc.h (limited to 'include') diff --git a/include/dt-bindings/power/r8a779f0-sysc.h b/include/dt-bindings/power/r8a779f0-sysc.h new file mode 100644 index 000000000000..0ec8ad727ed9 --- /dev/null +++ b/include/dt-bindings/power/r8a779f0-sysc.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: (GPL-2.0 or MIT) */ +/* + * Copyright (C) 2021 Renesas Electronics Corp. + */ +#ifndef __DT_BINDINGS_POWER_R8A779F0_SYSC_H__ +#define __DT_BINDINGS_POWER_R8A779F0_SYSC_H__ + +/* + * These power domain indices match the Power Domain Register Numbers (PDR) + */ + +#define R8A779F0_PD_A1E0D0C0 0 +#define R8A779F0_PD_A1E0D0C1 1 +#define R8A779F0_PD_A1E0D1C0 2 +#define R8A779F0_PD_A1E0D1C1 3 +#define R8A779F0_PD_A1E1D0C0 4 +#define R8A779F0_PD_A1E1D0C1 5 +#define R8A779F0_PD_A1E1D1C0 6 +#define R8A779F0_PD_A1E1D1C1 7 +#define R8A779F0_PD_A2E0D0 16 +#define R8A779F0_PD_A2E0D1 17 +#define R8A779F0_PD_A2E1D0 18 +#define R8A779F0_PD_A2E1D1 19 +#define R8A779F0_PD_A3E0 20 +#define R8A779F0_PD_A3E1 21 + +/* Always-on power area */ +#define R8A779F0_PD_ALWAYS_ON 64 + +#endif /* __DT_BINDINGS_POWER_R8A779A0_SYSC_H__*/ -- cgit v1.2.3 From 81c1655823237eaec74b2c175ae8e13adfccdaf7 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 1 Dec 2021 16:32:57 +0900 Subject: dt-bindings: clock: Add r8a779f0 CPG Core Clock Definitions Add all Clock Pulse Generator Core Clock Outputs for the Renesas R-Car S4-8 (R8A779F0) SoC. Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20211201073308.1003945-4-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/r8a779f0-cpg-mssr.h | 64 +++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 include/dt-bindings/clock/r8a779f0-cpg-mssr.h (limited to 'include') diff --git a/include/dt-bindings/clock/r8a779f0-cpg-mssr.h b/include/dt-bindings/clock/r8a779f0-cpg-mssr.h new file mode 100644 index 000000000000..f2ae1c6a82dd --- /dev/null +++ b/include/dt-bindings/clock/r8a779f0-cpg-mssr.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: (GPL-2.0 or MIT) */ +/* + * Copyright (C) 2021 Renesas Electronics Corp. + */ +#ifndef __DT_BINDINGS_CLOCK_R8A779F0_CPG_MSSR_H__ +#define __DT_BINDINGS_CLOCK_R8A779F0_CPG_MSSR_H__ + +#include + +/* r8a779f0 CPG Core Clocks */ + +#define R8A779F0_CLK_ZX 0 +#define R8A779F0_CLK_ZS 1 +#define R8A779F0_CLK_ZT 2 +#define R8A779F0_CLK_ZTR 3 +#define R8A779F0_CLK_S0D2 4 +#define R8A779F0_CLK_S0D3 5 +#define R8A779F0_CLK_S0D4 6 +#define R8A779F0_CLK_S0D2_MM 7 +#define R8A779F0_CLK_S0D3_MM 8 +#define R8A779F0_CLK_S0D4_MM 9 +#define R8A779F0_CLK_S0D2_RT 10 +#define R8A779F0_CLK_S0D3_RT 11 +#define R8A779F0_CLK_S0D4_RT 12 +#define R8A779F0_CLK_S0D6_RT 13 +#define R8A779F0_CLK_S0D3_PER 14 +#define R8A779F0_CLK_S0D6_PER 15 +#define R8A779F0_CLK_S0D12_PER 16 +#define R8A779F0_CLK_S0D24_PER 17 +#define R8A779F0_CLK_S0D2_HSC 18 +#define R8A779F0_CLK_S0D3_HSC 19 +#define R8A779F0_CLK_S0D4_HSC 20 +#define R8A779F0_CLK_S0D6_HSC 21 +#define R8A779F0_CLK_S0D12_HSC 22 +#define R8A779F0_CLK_S0D2_CC 23 +#define R8A779F0_CLK_CL 24 +#define R8A779F0_CLK_CL16M 25 +#define R8A779F0_CLK_CL16M_MM 26 +#define R8A779F0_CLK_CL16M_RT 27 +#define R8A779F0_CLK_CL16M_PER 28 +#define R8A779F0_CLK_CL16M_HSC 29 +#define R8A779F0_CLK_Z0 30 +#define R8A779F0_CLK_Z1 31 +#define R8A779F0_CLK_ZB3 32 +#define R8A779F0_CLK_ZB3D2 33 +#define R8A779F0_CLK_ZB3D4 34 +#define R8A779F0_CLK_SD0H 35 +#define R8A779F0_CLK_SD0 36 +#define R8A779F0_CLK_RPC 37 +#define R8A779F0_CLK_RPCD2 38 +#define R8A779F0_CLK_MSO 39 +#define R8A779F0_CLK_SASYNCRT 40 +#define R8A779F0_CLK_SASYNCPERD1 41 +#define R8A779F0_CLK_SASYNCPERD2 42 +#define R8A779F0_CLK_SASYNCPERD4 43 +#define R8A779F0_CLK_DBGSOC_HSC 44 +#define R8A779F0_CLK_RSW2 45 +#define R8A779F0_CLK_OSC 46 +#define R8A779F0_CLK_ZR 47 +#define R8A779F0_CLK_CPEX 48 +#define R8A779F0_CLK_CBFUSA 49 +#define R8A779F0_CLK_R 50 + +#endif /* __DT_BINDINGS_CLOCK_R8A779F0_CPG_MSSR_H__ */ -- cgit v1.2.3 From 13244cccc2b61ec715f0ac583d3037497004d4a5 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 1 Dec 2021 10:54:52 -0800 Subject: skbuff: introduce skb_pull_data Like skb_pull but returns the original data pointer before pulling the data after performing a check against sbk->len. This allows to change code that does "struct foo *p = (void *)skb->data;" which is hard to audit and error prone, to: p = skb_pull_data(skb, sizeof(*p)); if (!p) return; Which is both safer and cleaner. Acked-by: Jakub Kicinski Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Dan Carpenter Signed-off-by: Marcel Holtmann --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index eba256af64a5..877dda38684a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2373,6 +2373,8 @@ static inline void *skb_pull_inline(struct sk_buff *skb, unsigned int len) return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); } +void *skb_pull_data(struct sk_buff *skb, size_t len); + void *__pskb_pull_tail(struct sk_buff *skb, int delta); static inline void *__pskb_pull(struct sk_buff *skb, unsigned int len) -- cgit v1.2.3 From ae61a10d9d46c4a0844c46d0863bf991c4dda66c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 1 Dec 2021 10:54:53 -0800 Subject: Bluetooth: HCI: Use skb_pull_data to parse BR/EDR events This uses skb_pull_data to check the BR/EDR events received have the minimum required length. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 0d2a9216869b..ba89b078ceb5 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2012,6 +2012,10 @@ struct hci_cp_le_reject_cis { } __packed; /* ---- HCI Events ---- */ +struct hci_ev_status { + __u8 status; +} __packed; + #define HCI_EV_INQUIRY_COMPLETE 0x01 #define HCI_EV_INQUIRY_RESULT 0x02 -- cgit v1.2.3 From aadc3d2f42a5bfcec597bfd0d997e3982f740846 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 1 Dec 2021 10:54:55 -0800 Subject: Bluetooth: HCI: Use skb_pull_data to parse Number of Complete Packets event This uses skb_pull_data to check the Number of Complete Packets events received have the minimum required length. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index ba89b078ceb5..3f57fd677b67 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2139,7 +2139,7 @@ struct hci_comp_pkts_info { } __packed; struct hci_ev_num_comp_pkts { - __u8 num_hndl; + __u8 num; struct hci_comp_pkts_info handles[]; } __packed; -- cgit v1.2.3 From 27d9eb4bcac16446ddbbea8860c11720b7afa891 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 1 Dec 2021 10:54:56 -0800 Subject: Bluetooth: HCI: Use skb_pull_data to parse Inquiry Result event This uses skb_pull_data to check the Inquiry Result events received have the minimum required length. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 3f57fd677b67..55466bfb972a 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2028,6 +2028,11 @@ struct inquiry_info { __le16 clock_offset; } __packed; +struct hci_ev_inquiry_result { + __u8 num; + struct inquiry_info info[]; +}; + #define HCI_EV_CONN_COMPLETE 0x03 struct hci_ev_conn_complete { __u8 status; -- cgit v1.2.3 From 8d08d324fdcb7729f22b236a3e20fa37a8a29e43 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 1 Dec 2021 10:54:57 -0800 Subject: Bluetooth: HCI: Use skb_pull_data to parse Inquiry Result with RSSI event This uses skb_pull_data to check the Inquiry Result with RSSI events received have the minimum required length. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 55466bfb972a..d25afd92a46e 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2194,7 +2194,7 @@ struct hci_ev_pscan_rep_mode { } __packed; #define HCI_EV_INQUIRY_RESULT_WITH_RSSI 0x22 -struct inquiry_info_with_rssi { +struct inquiry_info_rssi { bdaddr_t bdaddr; __u8 pscan_rep_mode; __u8 pscan_period_mode; @@ -2202,7 +2202,7 @@ struct inquiry_info_with_rssi { __le16 clock_offset; __s8 rssi; } __packed; -struct inquiry_info_with_rssi_and_pscan_mode { +struct inquiry_info_rssi_pscan { bdaddr_t bdaddr; __u8 pscan_rep_mode; __u8 pscan_period_mode; @@ -2211,6 +2211,14 @@ struct inquiry_info_with_rssi_and_pscan_mode { __le16 clock_offset; __s8 rssi; } __packed; +struct hci_ev_inquiry_result_rssi { + __u8 num; + struct inquiry_info_rssi info[]; +} __packed; +struct hci_ev_inquiry_result_rssi_pscan { + __u8 num; + struct inquiry_info_rssi_pscan info[]; +} __packed; #define HCI_EV_REMOTE_EXT_FEATURES 0x23 struct hci_ev_remote_ext_features { -- cgit v1.2.3 From 70a6b8de6af5c3e517bfd2ce8dccbd65c3f5bb4f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 1 Dec 2021 10:54:58 -0800 Subject: Bluetooth: HCI: Use skb_pull_data to parse Extended Inquiry Result event This uses skb_pull_data to check the Extended Inquiry Result events received have the minimum required length. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index d25afd92a46e..9e721e6efef3 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2273,6 +2273,11 @@ struct extended_inquiry_info { __u8 data[240]; } __packed; +struct hci_ev_ext_inquiry_result { + __u8 num; + struct extended_inquiry_info info[]; +} __packed; + #define HCI_EV_KEY_REFRESH_COMPLETE 0x30 struct hci_ev_key_refresh_complete { __u8 status; -- cgit v1.2.3 From 47afe93c913a4cd0143667b59ba622086a2acfce Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 1 Dec 2021 10:55:00 -0800 Subject: Bluetooth: HCI: Use skb_pull_data to parse LE Advertising Report event This uses skb_pull_data to check the LE Advertising Report events received have the minimum required length. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 9e721e6efef3..c005b1ccdbc5 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2445,13 +2445,18 @@ struct hci_ev_le_conn_complete { #define HCI_EV_LE_ADVERTISING_REPORT 0x02 struct hci_ev_le_advertising_info { - __u8 evt_type; + __u8 type; __u8 bdaddr_type; bdaddr_t bdaddr; __u8 length; __u8 data[]; } __packed; +struct hci_ev_le_advertising_report { + __u8 num; + struct hci_ev_le_advertising_info info[]; +} __packed; + #define HCI_EV_LE_CONN_UPDATE_COMPLETE 0x03 struct hci_ev_le_conn_update_complete { __u8 status; -- cgit v1.2.3 From b48b833f9e8aa0675820a3b5a0f30d7319590ea8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 1 Dec 2021 10:55:01 -0800 Subject: Bluetooth: HCI: Use skb_pull_data to parse LE Ext Advertising Report event This uses skb_pull_data to check the LE Extended Advertising Report events received have the minimum required length. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index c005b1ccdbc5..d3f2da9b2ac2 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2517,8 +2517,8 @@ struct hci_ev_le_phy_update_complete { } __packed; #define HCI_EV_LE_EXT_ADV_REPORT 0x0d -struct hci_ev_le_ext_adv_report { - __le16 evt_type; +struct hci_ev_le_ext_adv_info { + __le16 type; __u8 bdaddr_type; bdaddr_t bdaddr; __u8 primary_phy; @@ -2526,11 +2526,16 @@ struct hci_ev_le_ext_adv_report { __u8 sid; __u8 tx_power; __s8 rssi; - __le16 interval; - __u8 direct_addr_type; + __le16 interval; + __u8 direct_addr_type; bdaddr_t direct_addr; - __u8 length; - __u8 data[]; + __u8 length; + __u8 data[]; +} __packed; + +struct hci_ev_le_ext_adv_report { + __u8 num; + struct hci_ev_le_ext_adv_info info[]; } __packed; #define HCI_EV_LE_ENHANCED_CONN_COMPLETE 0x0a -- cgit v1.2.3 From a3679649a19179813c3853b8bb397a801d9dd253 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 1 Dec 2021 10:55:02 -0800 Subject: Bluetooth: HCI: Use skb_pull_data to parse LE Direct Advertising Report event This uses skb_pull_data to check the LE Direct Advertising Report events received have the minimum required length. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index d3f2da9b2ac2..4343f79bd02c 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2500,7 +2500,7 @@ struct hci_ev_le_data_len_change { #define HCI_EV_LE_DIRECT_ADV_REPORT 0x0B struct hci_ev_le_direct_adv_info { - __u8 evt_type; + __u8 type; __u8 bdaddr_type; bdaddr_t bdaddr; __u8 direct_addr_type; @@ -2508,6 +2508,11 @@ struct hci_ev_le_direct_adv_info { __s8 rssi; } __packed; +struct hci_ev_le_direct_adv_report { + __u8 num; + struct hci_ev_le_direct_adv_info info[]; +} __packed; + #define HCI_EV_LE_PHY_UPDATE_COMPLETE 0x0c struct hci_ev_le_phy_update_complete { __u8 status; -- cgit v1.2.3 From d2f8114f9574509580a8506d2ef72e7e43d1a5bd Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Thu, 2 Dec 2021 12:41:59 +0000 Subject: Bluetooth: add quirk disabling LE Read Transmit Power Some devices have a bug causing them to not work if they query LE tx power on startup. Thus we add a quirk in order to not query it and default min/max tx power values to HCI_TX_POWER_INVALID. Signed-off-by: Aditya Garg Reported-by: Orlando Chamberlain Tested-by: Orlando Chamberlain Link: https://lore.kernel.org/r/4970a940-211b-25d6-edab-21a815313954@protonmail.com Fixes: 7c395ea521e6 ("Bluetooth: Query LE tx power on startup") Cc: stable@vger.kernel.org Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 4343f79bd02c..c5f6b82b9d11 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -246,6 +246,15 @@ enum { * HCI after resume. */ HCI_QUIRK_NO_SUSPEND_NOTIFIER, + + /* + * When this quirk is set, LE tx power is not queried on startup + * and the min/max tx power values default to HCI_TX_POWER_INVALID. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, }; /* HCI device flags */ -- cgit v1.2.3 From fe92ee6425a2c79ee84f0b9b8a14117200d15f7d Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 1 Dec 2021 11:49:50 -0800 Subject: Bluetooth: hci_core: Rework hci_conn_params flags This reworks hci_conn_params flags to use bitmap_* helpers and add support for setting the supported flags in hdev->conn_flags so it can easily be accessed. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 7bae8376fd6f..d1b67755a373 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -152,22 +152,21 @@ struct bdaddr_list_with_irk { u8 local_irk[16]; }; -struct bdaddr_list_with_flags { - struct list_head list; - bdaddr_t bdaddr; - u8 bdaddr_type; - u32 current_flags; -}; - enum hci_conn_flags { HCI_CONN_FLAG_REMOTE_WAKEUP, - HCI_CONN_FLAG_MAX -}; -#define hci_conn_test_flag(nr, flags) ((flags) & (1U << nr)) + __HCI_CONN_NUM_FLAGS, +}; /* Make sure number of flags doesn't exceed sizeof(current_flags) */ -static_assert(HCI_CONN_FLAG_MAX < 32); +static_assert(__HCI_CONN_NUM_FLAGS < 32); + +struct bdaddr_list_with_flags { + struct list_head list; + bdaddr_t bdaddr; + u8 bdaddr_type; + DECLARE_BITMAP(flags, __HCI_CONN_NUM_FLAGS); +}; struct bt_uuid { struct list_head list; @@ -560,6 +559,7 @@ struct hci_dev { struct rfkill *rfkill; DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS); + DECLARE_BITMAP(conn_flags, __HCI_CONN_NUM_FLAGS); __s8 adv_tx_power; __u8 adv_data[HCI_MAX_EXT_AD_LENGTH]; @@ -755,7 +755,7 @@ struct hci_conn_params { struct hci_conn *conn; bool explicit_connect; - u32 current_flags; + DECLARE_BITMAP(flags, __HCI_CONN_NUM_FLAGS); }; extern struct list_head hci_dev_list; -- cgit v1.2.3 From 6126ffabba6b415bd6bf3e1b091ef074e86765c6 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 3 Dec 2021 14:07:22 -0800 Subject: Bluetooth: Introduce HCI_CONN_FLAG_DEVICE_PRIVACY device flag This introduces HCI_CONN_FLAG_DEVICE_PRIVACY which can be used by userspace to indicate to the controller to use Device Privacy Mode to a specific device. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index d1b67755a373..cf24af649c7f 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -154,6 +154,7 @@ struct bdaddr_list_with_irk { enum hci_conn_flags { HCI_CONN_FLAG_REMOTE_WAKEUP, + HCI_CONN_FLAG_DEVICE_PRIVACY, __HCI_CONN_NUM_FLAGS, }; @@ -1466,6 +1467,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define use_ll_privacy(dev) (ll_privacy_capable(dev) && \ hci_dev_test_flag(dev, HCI_ENABLE_LL_PRIVACY)) +#define privacy_mode_capable(dev) (use_ll_privacy(dev) && \ + (hdev->commands[39] & 0x04)) + /* Use enhanced synchronous connection if command is supported */ #define enhanced_sco_capable(dev) ((dev)->commands[29] & 0x08) -- cgit v1.2.3 From 853b70b506a20cddf96419e8ac198df92e51bc4c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 3 Dec 2021 14:07:23 -0800 Subject: Bluetooth: hci_sync: Set Privacy Mode when updating the resolving list This adds support for Set Privacy Mode when updating the resolving list when HCI_CONN_FLAG_DEVICE_PRIVACY so the controller shall use Device Mode for devices programmed in the resolving list, Device Mode is actually required when the remote device are not able to use RPA as otherwise the default mode is Network Privacy Mode in which only allows RPAs thus the controller would filter out advertisement using identity addresses for which there is an IRK. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 10 ++++++++++ include/net/bluetooth/hci_core.h | 1 + 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index c5f6b82b9d11..7f5f00ff53da 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1940,6 +1940,16 @@ struct hci_rp_le_read_transmit_power { __s8 max_le_tx_power; } __packed; +#define HCI_NETWORK_PRIVACY 0x00 +#define HCI_DEVICE_PRIVACY 0x01 + +#define HCI_OP_LE_SET_PRIVACY_MODE 0x204e +struct hci_cp_le_set_privacy_mode { + __u8 bdaddr_type; + bdaddr_t bdaddr; + __u8 mode; +} __packed; + #define HCI_OP_LE_READ_BUFFER_SIZE_V2 0x2060 struct hci_rp_le_read_buffer_size_v2 { __u8 status; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index cf24af649c7f..4d69dcfebd63 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -757,6 +757,7 @@ struct hci_conn_params { struct hci_conn *conn; bool explicit_connect; DECLARE_BITMAP(flags, __HCI_CONN_NUM_FLAGS); + u8 privacy_mode; }; extern struct list_head hci_dev_list; -- cgit v1.2.3 From 8aca46f91c42020bc58cd56e464a1101e517aa10 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 3 Dec 2021 16:15:40 -0800 Subject: Bluetooth: mgmt: Introduce mgmt_alloc_skb and mgmt_send_event_skb This introduces mgmt_alloc_skb and mgmt_send_event_skb which are convenient when building MGMT events that have variable length as the likes of skb_put_data can be used to insert portion directly on the skb instead of having to first build an intermediate buffer just to be copied over the skb. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 2f31e571f34c..77906832353f 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -390,6 +390,11 @@ struct hci_ctrl { }; }; +struct mgmt_ctrl { + struct hci_dev *hdev; + u16 opcode; +}; + struct bt_skb_cb { u8 pkt_type; u8 force_active; @@ -399,6 +404,7 @@ struct bt_skb_cb { struct l2cap_ctrl l2cap; struct sco_ctrl sco; struct hci_ctrl hci; + struct mgmt_ctrl mgmt; }; }; #define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb)) -- cgit v1.2.3 From 6fadaa565882cd7afc501de5921db6f5e45c784b Mon Sep 17 00:00:00 2001 From: Maxim Galaganov Date: Fri, 3 Dec 2021 14:35:39 -0800 Subject: tcp: expose __tcp_sock_set_cork and __tcp_sock_set_nodelay Expose __tcp_sock_set_cork() and __tcp_sock_set_nodelay() for use in MPTCP setsockopt code -- namely for syncing MPTCP socket options with subflows inside sync_socket_options() while already holding the subflow socket lock. Acked-by: Paolo Abeni Acked-by: Matthieu Baerts Signed-off-by: Maxim Galaganov Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/linux/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 48d8a363319e..78b91bb92f0d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -512,11 +512,13 @@ static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss) int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, int shiftlen); +void __tcp_sock_set_cork(struct sock *sk, bool on); void tcp_sock_set_cork(struct sock *sk, bool on); int tcp_sock_set_keepcnt(struct sock *sk, int val); int tcp_sock_set_keepidle_locked(struct sock *sk, int val); int tcp_sock_set_keepidle(struct sock *sk, int val); int tcp_sock_set_keepintvl(struct sock *sk, int val); +void __tcp_sock_set_nodelay(struct sock *sk, bool on); void tcp_sock_set_nodelay(struct sock *sk); void tcp_sock_set_quickack(struct sock *sk, int val); int tcp_sock_set_syncnt(struct sock *sk, int val); -- cgit v1.2.3 From 213d56bf33bdda835bac04046f09256a75c5ca8e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 19 Oct 2021 02:08:07 +0200 Subject: rcu/nocb: Prepare state machine for a new step Currently SEGCBLIST_SOFTIRQ_ONLY is a bit of an exception among the segcblist flags because it is an exclusive state that doesn't mix up with the other flags. Remove it in favour of: _ A flag specifying that rcu_core() needs to perform callbacks execution and acceleration and _ A flag specifying we want the nocb lock to be held in any needed circumstances This clarifies the code and is more flexible: It allows to have a state where rcu_core() runs with locking while offloading hasn't started yet. This is a necessary step to prepare for triggering rcu_core() at the very beginning of the de-offloading process so that rcu_core() won't dismiss work while being preempted by the de-offloading process, at least not without a pending subsequent rcu_core() that will quickly catch up. Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Tested-by: Sebastian Andrzej Siewior Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Josh Triplett Cc: Joel Fernandes Cc: Boqun Feng Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Thomas Gleixner Signed-off-by: Paul E. McKenney --- include/linux/rcu_segcblist.h | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 3db96c4f45fd..812961b1d064 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -69,7 +69,7 @@ struct rcu_cblist { * * * ---------------------------------------------------------------------------- - * | SEGCBLIST_SOFTIRQ_ONLY | + * | SEGCBLIST_RCU_CORE | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, without holding nocb_lock. | @@ -77,7 +77,7 @@ struct rcu_cblist { * | * v * ---------------------------------------------------------------------------- - * | SEGCBLIST_OFFLOADED | + * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads, | @@ -89,7 +89,9 @@ struct rcu_cblist { * | | * v v * --------------------------------------- ----------------------------------| - * | SEGCBLIST_OFFLOADED | | | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_RCU_CORE | | | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | | SEGCBLIST_OFFLOADED | | * | SEGCBLIST_KTHREAD_CB | | SEGCBLIST_KTHREAD_GP | * | | | | * | | | | @@ -104,9 +106,10 @@ struct rcu_cblist { * | * v * |--------------------------------------------------------------------------| - * | SEGCBLIST_OFFLOADED | | - * | SEGCBLIST_KTHREAD_CB | | - * | SEGCBLIST_KTHREAD_GP | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_KTHREAD_GP | | + * | SEGCBLIST_KTHREAD_CB | * | | * | Kthreads handle callbacks holding nocb_lock, local rcu_core() stops | * | handling callbacks. Enable bypass queueing. | @@ -120,7 +123,8 @@ struct rcu_cblist { * * * |--------------------------------------------------------------------------| - * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | * | | @@ -130,6 +134,8 @@ struct rcu_cblist { * | * v * |--------------------------------------------------------------------------| + * | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | * | | @@ -143,7 +149,9 @@ struct rcu_cblist { * | | * v v * ---------------------------------------------------------------------------| - * | | + * | | | + * | SEGCBLIST_RCU_CORE | | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | SEGCBLIST_LOCKING | | * | SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP | * | | | * | GP kthread woke up and | CB kthread woke up and | @@ -159,7 +167,7 @@ struct rcu_cblist { * | * v * ---------------------------------------------------------------------------- - * | 0 | + * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, while holding nocb_lock. Forbid nocb_timer to be armed. | @@ -168,17 +176,18 @@ struct rcu_cblist { * | * v * ---------------------------------------------------------------------------- - * | SEGCBLIST_SOFTIRQ_ONLY | + * | SEGCBLIST_RCU_CORE | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, without holding nocb_lock. | * ---------------------------------------------------------------------------- */ #define SEGCBLIST_ENABLED BIT(0) -#define SEGCBLIST_SOFTIRQ_ONLY BIT(1) -#define SEGCBLIST_KTHREAD_CB BIT(2) -#define SEGCBLIST_KTHREAD_GP BIT(3) -#define SEGCBLIST_OFFLOADED BIT(4) +#define SEGCBLIST_RCU_CORE BIT(1) +#define SEGCBLIST_LOCKING BIT(2) +#define SEGCBLIST_KTHREAD_CB BIT(3) +#define SEGCBLIST_KTHREAD_GP BIT(4) +#define SEGCBLIST_OFFLOADED BIT(5) struct rcu_segcblist { struct rcu_head *head; -- cgit v1.2.3 From fbb94cbd70d41c7511460896dfc7f9ea5da704b3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 19 Oct 2021 02:08:08 +0200 Subject: rcu/nocb: Invoke rcu_core() at the start of deoffloading On PREEMPT_RT, if rcu_core() is preempted by the de-offloading process, some work, such as callbacks acceleration and invocation, may be left unattended due to the volatile checks on the offloaded state. In the worst case this work is postponed until the next rcu_pending() check that can take a jiffy to reach, which can be a problem in case of callbacks flooding. Solve that with invoking rcu_core() early in the de-offloading process. This way any work dismissed by an ongoing rcu_core() call fooled by a preempting deoffloading process will be caught up by a nearby future recall to rcu_core(), this time fully aware of the de-offloading state. Tested-by: Valentin Schneider Tested-by: Sebastian Andrzej Siewior Signed-off-by: Frederic Weisbecker Cc: Valentin Schneider Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Josh Triplett Cc: Joel Fernandes Cc: Boqun Feng Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Thomas Gleixner Signed-off-by: Paul E. McKenney --- include/linux/rcu_segcblist.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 812961b1d064..659d13a7ddaa 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -136,6 +136,20 @@ struct rcu_cblist { * |--------------------------------------------------------------------------| * | SEGCBLIST_RCU_CORE | | * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_KTHREAD_CB | | + * | SEGCBLIST_KTHREAD_GP | + * | | + * | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() | + * | handles callbacks concurrently. Bypass enqueue is enabled. | + * | Invoke RCU core so we make sure not to preempt it in the middle with | + * | leaving some urgent work unattended within a jiffy. | + * ---------------------------------------------------------------------------- + * | + * v + * |--------------------------------------------------------------------------| + * | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | * | | -- cgit v1.2.3 From 802a7dc5cf1bef06f7b290ce76d478138408d6b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Dec 2021 10:03:23 -0800 Subject: netfilter: conntrack: annotate data-races around ct->timeout (struct nf_conn)->timeout can be read/written locklessly, add READ_ONCE()/WRITE_ONCE() to prevent load/store tearing. BUG: KCSAN: data-race in __nf_conntrack_alloc / __nf_conntrack_find_get write to 0xffff888132e78c08 of 4 bytes by task 6029 on cpu 0: __nf_conntrack_alloc+0x158/0x280 net/netfilter/nf_conntrack_core.c:1563 init_conntrack+0x1da/0xb30 net/netfilter/nf_conntrack_core.c:1635 resolve_normal_ct+0x502/0x610 net/netfilter/nf_conntrack_core.c:1746 nf_conntrack_in+0x1c5/0x88f net/netfilter/nf_conntrack_core.c:1901 ipv6_conntrack_local+0x19/0x20 net/netfilter/nf_conntrack_proto.c:414 nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline] nf_hook_slow+0x72/0x170 net/netfilter/core.c:619 nf_hook include/linux/netfilter.h:262 [inline] NF_HOOK include/linux/netfilter.h:305 [inline] ip6_xmit+0xa3a/0xa60 net/ipv6/ip6_output.c:324 inet6_csk_xmit+0x1a2/0x1e0 net/ipv6/inet6_connection_sock.c:135 __tcp_transmit_skb+0x132a/0x1840 net/ipv4/tcp_output.c:1402 tcp_transmit_skb net/ipv4/tcp_output.c:1420 [inline] tcp_write_xmit+0x1450/0x4460 net/ipv4/tcp_output.c:2680 __tcp_push_pending_frames+0x68/0x1c0 net/ipv4/tcp_output.c:2864 tcp_push_pending_frames include/net/tcp.h:1897 [inline] tcp_data_snd_check+0x62/0x2e0 net/ipv4/tcp_input.c:5452 tcp_rcv_established+0x880/0x10e0 net/ipv4/tcp_input.c:5947 tcp_v6_do_rcv+0x36e/0xa50 net/ipv6/tcp_ipv6.c:1521 sk_backlog_rcv include/net/sock.h:1030 [inline] __release_sock+0xf2/0x270 net/core/sock.c:2768 release_sock+0x40/0x110 net/core/sock.c:3300 sk_stream_wait_memory+0x435/0x700 net/core/stream.c:145 tcp_sendmsg_locked+0xb85/0x25a0 net/ipv4/tcp.c:1402 tcp_sendmsg+0x2c/0x40 net/ipv4/tcp.c:1440 inet6_sendmsg+0x5f/0x80 net/ipv6/af_inet6.c:644 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] __sys_sendto+0x21e/0x2c0 net/socket.c:2036 __do_sys_sendto net/socket.c:2048 [inline] __se_sys_sendto net/socket.c:2044 [inline] __x64_sys_sendto+0x74/0x90 net/socket.c:2044 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff888132e78c08 of 4 bytes by task 17446 on cpu 1: nf_ct_is_expired include/net/netfilter/nf_conntrack.h:286 [inline] ____nf_conntrack_find net/netfilter/nf_conntrack_core.c:776 [inline] __nf_conntrack_find_get+0x1c7/0xac0 net/netfilter/nf_conntrack_core.c:807 resolve_normal_ct+0x273/0x610 net/netfilter/nf_conntrack_core.c:1734 nf_conntrack_in+0x1c5/0x88f net/netfilter/nf_conntrack_core.c:1901 ipv6_conntrack_local+0x19/0x20 net/netfilter/nf_conntrack_proto.c:414 nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline] nf_hook_slow+0x72/0x170 net/netfilter/core.c:619 nf_hook include/linux/netfilter.h:262 [inline] NF_HOOK include/linux/netfilter.h:305 [inline] ip6_xmit+0xa3a/0xa60 net/ipv6/ip6_output.c:324 inet6_csk_xmit+0x1a2/0x1e0 net/ipv6/inet6_connection_sock.c:135 __tcp_transmit_skb+0x132a/0x1840 net/ipv4/tcp_output.c:1402 __tcp_send_ack+0x1fd/0x300 net/ipv4/tcp_output.c:3956 tcp_send_ack+0x23/0x30 net/ipv4/tcp_output.c:3962 __tcp_ack_snd_check+0x2d8/0x510 net/ipv4/tcp_input.c:5478 tcp_ack_snd_check net/ipv4/tcp_input.c:5523 [inline] tcp_rcv_established+0x8c2/0x10e0 net/ipv4/tcp_input.c:5948 tcp_v6_do_rcv+0x36e/0xa50 net/ipv6/tcp_ipv6.c:1521 sk_backlog_rcv include/net/sock.h:1030 [inline] __release_sock+0xf2/0x270 net/core/sock.c:2768 release_sock+0x40/0x110 net/core/sock.c:3300 tcp_sendpage+0x94/0xb0 net/ipv4/tcp.c:1114 inet_sendpage+0x7f/0xc0 net/ipv4/af_inet.c:833 rds_tcp_xmit+0x376/0x5f0 net/rds/tcp_send.c:118 rds_send_xmit+0xbed/0x1500 net/rds/send.c:367 rds_send_worker+0x43/0x200 net/rds/threads.c:200 process_one_work+0x3fc/0x980 kernel/workqueue.c:2298 worker_thread+0x616/0xa70 kernel/workqueue.c:2445 kthread+0x2c7/0x2e0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 value changed: 0x00027cc2 -> 0x00000000 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 17446 Comm: kworker/u4:5 Tainted: G W 5.16.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: krdsd rds_send_worker Note: I chose an arbitrary commit for the Fixes: tag, because I do not think we need to backport this fix to very old kernels. Fixes: e37542ba111f ("netfilter: conntrack: avoid possible false sharing") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index cc663c68ddc4..d24b0a34c8f0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -276,14 +276,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) /* jiffies until ct expires, 0 if already expired */ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { - s32 timeout = ct->timeout - nfct_time_stamp; + s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; return timeout > 0 ? timeout : 0; } static inline bool nf_ct_is_expired(const struct nf_conn *ct) { - return (__s32)(ct->timeout - nfct_time_stamp) <= 0; + return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0; } /* use after obtaining a reference count */ @@ -302,7 +302,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct) static inline void nf_ct_offload_timeout(struct nf_conn *ct) { if (nf_ct_expires(ct) < NF_CT_DAY / 2) - ct->timeout = nfct_time_stamp + NF_CT_DAY; + WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY); } struct kernel_param; -- cgit v1.2.3 From 81faa4f6fba429334ff72bb5ba7696818509b5b5 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 3 Nov 2021 16:30:28 +0800 Subject: locktorture,rcutorture,torture: Always log error message Unconditionally log messages corresponding to errors. Acked-by: Davidlohr Bueso Signed-off-by: Li Zhijian Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/torture.h b/include/linux/torture.h index 24f58e50a94b..63fa4196e51c 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -38,13 +38,8 @@ do { \ pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s); \ } \ } while (0) -#define VERBOSE_TOROUT_ERRSTRING(s) \ -do { \ - if (verbose) { \ - verbose_torout_sleep(); \ - pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s); \ - } \ -} while (0) +#define TOROUT_ERRSTRING(s) \ + pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s) void verbose_torout_sleep(void); #define torture_init_error(firsterr) \ -- cgit v1.2.3 From 08f0b22d731fa86957749c649d6ef6ebc07e8ad2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Dec 2021 17:30:27 -0800 Subject: net: eql: add net device refcount tracker Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/if_eql.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/if_eql.h b/include/linux/if_eql.h index d984694c384d..d75601d613cc 100644 --- a/include/linux/if_eql.h +++ b/include/linux/if_eql.h @@ -26,6 +26,7 @@ typedef struct slave { struct list_head list; struct net_device *dev; + netdevice_tracker dev_tracker; long priority; long priority_bps; long priority_Bps; -- cgit v1.2.3 From 19c9ebf6ed70856385296a65e78c1699081b152f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Dec 2021 17:30:28 -0800 Subject: vlan: add net device refcount tracker Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/if_vlan.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 41a518336673..8420fe504927 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -162,6 +162,7 @@ struct netpoll; * @vlan_id: VLAN identifier * @flags: device flags * @real_dev: underlying netdevice + * @dev_tracker: refcount tracker for @real_dev reference * @real_dev_addr: address of underlying netdevice * @dent: proc dir entry * @vlan_pcpu_stats: ptr to percpu rx stats @@ -177,6 +178,8 @@ struct vlan_dev_priv { u16 flags; struct net_device *real_dev; + netdevice_tracker dev_tracker; + unsigned char real_dev_addr[ETH_ALEN]; struct proc_dir_entry *dent; -- cgit v1.2.3 From f12bf6f3f942b37de65eeea8be25903587fec930 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Dec 2021 17:30:30 -0800 Subject: net: watchdog: add net device refcount tracker Add a netdevice_tracker inside struct net_device, to track the self reference when a device has an active watchdog timer. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 69dca1edd5a6..1a748ee9a421 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1951,6 +1951,7 @@ enum netdev_ml_priv_type { * * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. * @linkwatch_dev_tracker: refcount tracker used by linkwatch. + * @watchdog_dev_tracker: refcount tracker used by watchdog. * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2282,6 +2283,7 @@ struct net_device { u8 dev_addr_shadow[MAX_ADDR_LEN]; netdevice_tracker linkwatch_dev_tracker; + netdevice_tracker watchdog_dev_tracker; }; #define to_net_dev(d) container_of(d, struct net_device, dev) -- cgit v1.2.3 From e44b14ebae1025cff3bef2d78a2e2f6869cefca0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Dec 2021 17:30:32 -0800 Subject: inet: add net device refcount tracker to struct fib_nh_common Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/ip_fib.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 3417ba2d27ad..c4297704bbcb 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -79,6 +79,7 @@ struct fnhe_hash_bucket { struct fib_nh_common { struct net_device *nhc_dev; + netdevice_tracker nhc_dev_tracker; int nhc_oif; unsigned char nhc_scope; u8 nhc_family; @@ -111,6 +112,7 @@ struct fib_nh { int nh_saddr_genid; #define fib_nh_family nh_common.nhc_family #define fib_nh_dev nh_common.nhc_dev +#define fib_nh_dev_tracker nh_common.nhc_dev_tracker #define fib_nh_oif nh_common.nhc_oif #define fib_nh_flags nh_common.nhc_flags #define fib_nh_lws nh_common.nhc_lwtstate -- cgit v1.2.3 From 66ce07f7802b68616a008d390f2e6783d68fb79f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Dec 2021 17:30:33 -0800 Subject: ax25: add net device refcount tracker Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/ax25.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/ax25.h b/include/net/ax25.h index 03d409de61ad..526e49589197 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -229,7 +229,10 @@ struct ctl_table; typedef struct ax25_dev { struct ax25_dev *next; + struct net_device *dev; + netdevice_tracker dev_tracker; + struct net_device *forward; struct ctl_table_header *sysheader; int values[AX25_MAX_VALUES]; -- cgit v1.2.3 From 615d069dcf1207462ce30c05a2f47d461be8f6c8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Dec 2021 17:30:34 -0800 Subject: llc: add net device refcount tracker Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/llc_conn.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index ea985aa7a6c5..2c1ea3414640 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -38,6 +38,7 @@ struct llc_sock { struct llc_addr laddr; /* lsap/mac pair */ struct llc_addr daddr; /* dsap/mac pair */ struct net_device *dev; /* device to send to remote */ + netdevice_tracker dev_tracker; u32 copied_seq; /* head of yet unread data */ u8 retry_count; /* number of retries */ u8 ack_must_be_send; -- cgit v1.2.3 From ada066b2e02cad7934e86e51985078d707c64250 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Dec 2021 17:30:39 -0800 Subject: net: sched: act_mirred: add net device refcount tracker Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_mirred.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 1cace4c69e44..32ce8ea36950 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -10,6 +10,7 @@ struct tcf_mirred { int tcfm_eaction; bool tcfm_mac_header_xmit; struct net_device __rcu *tcfm_dev; + netdevice_tracker tcfm_dev_tracker; struct list_head tcfm_list; }; #define to_mirred(a) ((struct tcf_mirred *)a) -- cgit v1.2.3 From a97770cc4016c2733bcef9dbe3d5b1ad02d13356 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Mon, 6 Dec 2021 16:12:27 +0800 Subject: net: phy: Remove unnecessary indentation in the comments of phy_device Fix warning as: linux-next/Documentation/networking/kapi:122: ./include/linux/phy.h:543: WARNING: Unexpected indentation. linux-next/Documentation/networking/kapi:122: ./include/linux/phy.h:544: WARNING: Block quote ends without a blank line; unexpected unindent. linux-next/Documentation/networking/kapi:122: ./include/linux/phy.h:546: WARNING: Unexpected indentation. Suggested-by: Akira Yokosawa Signed-off-by: Yanteng Si Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 96e43fbb2dd8..cbf03a5f9cf5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -538,11 +538,12 @@ struct macsec_ops; * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY * @state: State of the PHY for management purposes * @dev_flags: Device-specific flags used by the PHY driver. - * Bits [15:0] are free to use by the PHY driver to communicate - * driver specific behavior. - * Bits [23:16] are currently reserved for future use. - * Bits [31:24] are reserved for defining generic - * PHY driver behavior. + * + * - Bits [15:0] are free to use by the PHY driver to communicate + * driver specific behavior. + * - Bits [23:16] are currently reserved for future use. + * - Bits [31:24] are reserved for defining generic + * PHY driver behavior. * @irq: IRQ number of the PHY's interrupt (-1 if none) * @phy_timer: The timer for handling the state machine * @phylink: Pointer to phylink instance for this PHY -- cgit v1.2.3 From 32ecd22ba60bbb724c6631d763ce77e5139bd341 Mon Sep 17 00:00:00 2001 From: Colin Foster Date: Tue, 7 Dec 2021 09:00:30 -0800 Subject: net: mscc: ocelot: split register definitions to a separate file Move these to a separate file will allow them to be shared to other drivers. Signed-off-by: Colin Foster Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- include/soc/mscc/vsc7514_regs.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/soc/mscc/vsc7514_regs.h (limited to 'include') diff --git a/include/soc/mscc/vsc7514_regs.h b/include/soc/mscc/vsc7514_regs.h new file mode 100644 index 000000000000..98743e252012 --- /dev/null +++ b/include/soc/mscc/vsc7514_regs.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* + * Microsemi Ocelot Switch driver + * + * Copyright (c) 2021 Innovative Advantage Inc. + */ + +#ifndef VSC7514_REGS_H +#define VSC7514_REGS_H + +extern const u32 vsc7514_ana_regmap[]; +extern const u32 vsc7514_qs_regmap[]; +extern const u32 vsc7514_qsys_regmap[]; +extern const u32 vsc7514_rew_regmap[]; +extern const u32 vsc7514_sys_regmap[]; +extern const u32 vsc7514_vcap_regmap[]; +extern const u32 vsc7514_ptp_regmap[]; +extern const u32 vsc7514_dev_gmii_regmap[]; + +extern const struct vcap_field vsc7514_vcap_es0_keys[]; +extern const struct vcap_field vsc7514_vcap_es0_actions[]; +extern const struct vcap_field vsc7514_vcap_is1_keys[]; +extern const struct vcap_field vsc7514_vcap_is1_actions[]; +extern const struct vcap_field vsc7514_vcap_is2_keys[]; +extern const struct vcap_field vsc7514_vcap_is2_actions[]; + +#endif -- cgit v1.2.3 From 330c6d3bfa268794bf692165d0f781f1c2d4d83e Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 24 Nov 2021 10:45:36 +0900 Subject: can: bittiming: replace CAN units with the generic ones from linux/units.h In [1], we introduced a set of units in linux/can/bittiming.h. Since then, generic SI prefixes were added to linux/units.h in [2]. Those new prefixes can perfectly replace CAN specific ones. This patch replaces all occurrences of the CAN units with their corresponding prefix (from linux/units) and the unit (as a comment) according to below table. CAN units SI metric prefix (from linux/units) + unit (as a comment) ------------------------------------------------------------------------ CAN_KBPS KILO /* BPS */ CAN_MBPS MEGA /* BPS */ CAM_MHZ MEGA /* Hz */ The definition are then removed from linux/can/bittiming.h [1] commit 1d7750760b70 ("can: bittiming: add CAN_KBPS, CAN_MBPS and CAN_MHZ macros") [2] commit 26471d4a6cf8 ("units: Add SI metric prefix definitions") Link: https://lore.kernel.org/all/20211124014536.782550-1-mailhol.vincent@wanadoo.fr Suggested-by: Jimmy Assarsson Suggested-by: Oliver Hartkopp Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- include/linux/can/bittiming.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index 20b50baf3a02..a81652d1c6f3 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -12,13 +12,6 @@ #define CAN_SYNC_SEG 1 -/* Kilobits and Megabits per second */ -#define CAN_KBPS 1000UL -#define CAN_MBPS 1000000UL - -/* Megahertz */ -#define CAN_MHZ 1000000UL - #define CAN_CTRLMODE_TDC_MASK \ (CAN_CTRLMODE_TDC_AUTO | CAN_CTRLMODE_TDC_MANUAL) -- cgit v1.2.3 From 27592ae8dbe41033261b6fdf27d78998aabd2665 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 16 Nov 2021 16:03:57 +0000 Subject: KVM: Move wiping of the kvm->vcpus array to common code All architectures have similar loops iterating over the vcpus, freeing one vcpu at a time, and eventually wiping the reference off the vcpus array. They are also inconsistently taking the kvm->lock mutex when wiping the references from the array. Make this code common, which will simplify further changes. The locking is dropped altogether, as this should only be called when there is no further references on the kvm structure. Reviewed-by: Claudio Imbrenda Signed-off-by: Marc Zyngier Message-Id: <20211116160403.4074052-2-maz@kernel.org> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c310648cc8f1..e2f9f8f67c58 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -733,7 +733,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) if (WARN_ON_ONCE(!memslot->npages)) { \ } else -void kvm_vcpu_destroy(struct kvm_vcpu *vcpu); +void kvm_destroy_vcpus(struct kvm *kvm); void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From c5b077549136584618a66258f09d8d4b41e7409c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 16 Nov 2021 16:04:01 +0000 Subject: KVM: Convert the kvm->vcpus array to a xarray At least on arm64 and x86, the vcpus array is pretty huge (up to 1024 entries on x86) and is mostly empty in the majority of the cases (running 1k vcpu VMs is not that common). This mean that we end-up with a 4kB block of unused memory in the middle of the kvm structure. Instead of wasting away this memory, let's use an xarray instead, which gives us almost the same flexibility as a normal array, but with a reduced memory usage with smaller VMs. Signed-off-by: Marc Zyngier Message-Id: <20211116160403.4074052-6-maz@kernel.org> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e2f9f8f67c58..2201dc07126a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -552,7 +553,7 @@ struct kvm { struct mutex slots_arch_lock; struct mm_struct *mm; /* userspace tied to this vm */ struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; - struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; + struct xarray vcpu_array; /* Used to wait for completion of MMU notifiers. */ spinlock_t mn_invalidate_lock; @@ -701,7 +702,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */ smp_rmb(); - return kvm->vcpus[i]; + return xa_load(&kvm->vcpu_array, i); } #define kvm_for_each_vcpu(idx, vcpup, kvm) \ -- cgit v1.2.3 From 46808a4cb89708c2e5b264eb9d1035762581921b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 16 Nov 2021 16:04:02 +0000 Subject: KVM: Use 'unsigned long' as kvm_for_each_vcpu()'s index Everywhere we use kvm_for_each_vpcu(), we use an int as the vcpu index. Unfortunately, we're about to move rework the iterator, which requires this to be upgrade to an unsigned long. Let's bite the bullet and repaint all of it in one go. Signed-off-by: Marc Zyngier Message-Id: <20211116160403.4074052-7-maz@kernel.org> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2201dc07126a..7da6086262c6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -714,7 +714,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) { struct kvm_vcpu *vcpu = NULL; - int i; + unsigned long i; if (id < 0) return NULL; -- cgit v1.2.3 From 214bd3a6f46981b7867946e1b4f628a06bcf2091 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 16 Nov 2021 16:04:03 +0000 Subject: KVM: Convert kvm_for_each_vcpu() to using xa_for_each_range() Now that the vcpu array is backed by an xarray, use the optimised iterator that matches the underlying data structure. Suggested-by: Sean Christopherson Signed-off-by: Marc Zyngier Message-Id: <20211116160403.4074052-8-maz@kernel.org> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7da6086262c6..66548287ed42 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -705,11 +705,9 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) return xa_load(&kvm->vcpu_array, i); } -#define kvm_for_each_vcpu(idx, vcpup, kvm) \ - for (idx = 0; \ - idx < atomic_read(&kvm->online_vcpus) && \ - (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ - idx++) +#define kvm_for_each_vcpu(idx, vcpup, kvm) \ + xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \ + (atomic_read(&kvm->online_vcpus) - 1)) static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) { -- cgit v1.2.3 From afa319a54a8c760ba59683cd3c4318635049a664 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 6 Dec 2021 20:54:07 +0100 Subject: KVM: Require total number of memslot pages to fit in an unsigned long Explicitly disallow creating more memslot pages than can fit in an unsigned long, KVM doesn't correctly handle a total number of memslot pages that doesn't fit in an unsigned long and remedying that would be a waste of time. For a 64-bit kernel, this is a nop as memslots are not allowed to overlap in the gfn address space. With a 32-bit kernel, userspace can at most address 3gb of virtual memory, whereas wrapping the total number of pages would require 4tb+ of guest physical memory. Even with x86's second address space for SMM, userspace would need to alias all of guest memory more than one _thousand_ times. And on older x86 hardware with MAXPHYADDR < 43, the guest couldn't actually access any of those aliases even if userspace lied about guest.MAXPHYADDR. On 390 and arm64, this is a nop as they don't support 32-bit hosts. On x86, practically speaking this is simply acknowledging reality as the existing kvm_mmu_calculate_default_mmu_pages() assumes the total number of pages fits in an "unsigned long". On PPC, this is likely a nop as every flavor of PPC KVM assumes gfns (and gpas!) fit in unsigned long. arch/powerpc/kvm/book3s_32_mmu_host.c goes a step further and fails the build if CONFIG_PTE_64BIT=y, which presumably means that it does't support 64-bit physical addresses. On MIPS, this is also likely a nop as the core MMU helpers assume gpas fit in unsigned long, e.g. see kvm_mips_##name##_pte. And finally, RISC-V is a "don't care" as it doesn't exist in any release, i.e. there is no established ABI to break. Signed-off-by: Sean Christopherson Reviewed-by: Maciej S. Szmigiero Signed-off-by: Maciej S. Szmigiero Message-Id: <1c2c91baf8e78acccd4dad38da591002e61c013c.1638817638.git.maciej.szmigiero@oracle.com> --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 66548287ed42..e38705359af5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -552,6 +552,7 @@ struct kvm { */ struct mutex slots_arch_lock; struct mm_struct *mm; /* userspace tied to this vm */ + unsigned long nr_memslot_pages; struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; struct xarray vcpu_array; -- cgit v1.2.3 From 537a17b3149300987456e8949ccb991e604047d6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 6 Dec 2021 20:54:11 +0100 Subject: KVM: Let/force architectures to deal with arch specific memslot data Pass the "old" slot to kvm_arch_prepare_memory_region() and force arch code to handle propagating arch specific data from "new" to "old" when necessary. This is a baby step towards dynamically allocating "new" from the get go, and is a (very) minor performance boost on x86 due to not unnecessarily copying arch data. For PPC HV, copy the rmap in the !CREATE and !DELETE paths, i.e. for MOVE and FLAGS_ONLY. This is functionally a nop as the previous behavior would overwrite the pointer for CREATE, and eventually discard/ignore it for DELETE. For x86, copy the arch data only for FLAGS_ONLY changes. Unlike PPC HV, x86 needs to reallocate arch data in the MOVE case as the size of x86's allocations depend on the alignment of the memslot's gfn. Opportunistically tweak kvm_arch_prepare_memory_region()'s param order to match the "commit" prototype. Signed-off-by: Sean Christopherson Reviewed-by: Maciej S. Szmigiero [mss: add missing RISCV kvm_arch_prepare_memory_region() change] Signed-off-by: Maciej S. Szmigiero Message-Id: <67dea5f11bbcfd71e3da5986f11e87f5dd4013f9.1638817639.git.maciej.szmigiero@oracle.com> --- include/linux/kvm_host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e38705359af5..cb7311dc6f32 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -833,8 +833,9 @@ int __kvm_set_memory_region(struct kvm *kvm, void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, const struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + struct kvm_memory_slot *new, enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, -- cgit v1.2.3 From 6a99c6e3f52a6f0d4c6ebcfa7359c718a19ffbe6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 6 Dec 2021 20:54:18 +0100 Subject: KVM: Stop passing kvm_userspace_memory_region to arch memslot hooks Drop the @mem param from kvm_arch_{prepare,commit}_memory_region() now that its use has been removed in all architectures. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Maciej S. Szmigiero Signed-off-by: Maciej S. Szmigiero Message-Id: --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cb7311dc6f32..da0d4f21a150 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -833,12 +833,10 @@ int __kvm_set_memory_region(struct kvm *kvm, void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, struct kvm_memory_slot *new, enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem, struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change); -- cgit v1.2.3 From c928bfc2632fa3dd6a3bd4504ac6d8e42302287a Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Mon, 6 Dec 2021 20:54:25 +0100 Subject: KVM: Integrate gfn_to_memslot_approx() into search_memslots() s390 arch has gfn_to_memslot_approx() which is almost identical to search_memslots(), differing only in that in case the gfn falls in a hole one of the memslots bordering the hole is returned. Add this lookup mode as an option to search_memslots() so we don't have two almost identical functions for looking up a memslot by its gfn. Signed-off-by: Maciej S. Szmigiero [sean: tweaked helper names to keep gfn_to_memslot_approx() in s390] Reviewed-by: Sean Christopherson Message-Id: <171cd89b52c718dbe180ecd909b4437a64a7e2ec.1638817640.git.maciej.szmigiero@oracle.com> --- include/linux/kvm_host.h | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index da0d4f21a150..2f80ce84fbcf 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1233,10 +1233,14 @@ try_get_memslot(struct kvm_memslots *slots, int slot_index, gfn_t gfn) * Returns a pointer to the memslot that contains gfn and records the index of * the slot in index. Otherwise returns NULL. * + * With "approx" set returns the memslot also when the address falls + * in a hole. In that case one of the memslots bordering the hole is + * returned. + * * IMPORTANT: Slots are sorted from highest GFN to lowest GFN! */ static inline struct kvm_memory_slot * -search_memslots(struct kvm_memslots *slots, gfn_t gfn, int *index) +search_memslots(struct kvm_memslots *slots, gfn_t gfn, int *index, bool approx) { int start = 0, end = slots->used_slots; struct kvm_memory_slot *memslots = slots->memslots; @@ -1254,22 +1258,26 @@ search_memslots(struct kvm_memslots *slots, gfn_t gfn, int *index) start = slot + 1; } + if (approx && start >= slots->used_slots) { + *index = slots->used_slots - 1; + return &memslots[slots->used_slots - 1]; + } + slot = try_get_memslot(slots, start, gfn); if (slot) { *index = start; return slot; } + if (approx) { + *index = start; + return &memslots[start]; + } return NULL; } -/* - * __gfn_to_memslot() and its descendants are here because it is called from - * non-modular code in arch/powerpc/kvm/book3s_64_vio{,_hv}.c. gfn_to_memslot() - * itself isn't here as an inline because that would bloat other code too much. - */ static inline struct kvm_memory_slot * -__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn) +____gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn, bool approx) { struct kvm_memory_slot *slot; int slot_index = atomic_read(&slots->last_used_slot); @@ -1278,7 +1286,7 @@ __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn) if (slot) return slot; - slot = search_memslots(slots, gfn, &slot_index); + slot = search_memslots(slots, gfn, &slot_index, approx); if (slot) { atomic_set(&slots->last_used_slot, slot_index); return slot; @@ -1287,6 +1295,17 @@ __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn) return NULL; } +/* + * __gfn_to_memslot() and its descendants are here to allow arch code to inline + * the lookups in hot paths. gfn_to_memslot() itself isn't here as an inline + * because that would bloat other code too much. + */ +static inline struct kvm_memory_slot * +__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn) +{ + return ____gfn_to_memslot(slots, gfn, false); +} + static inline unsigned long __gfn_to_hva_memslot(const struct kvm_memory_slot *slot, gfn_t gfn) { -- cgit v1.2.3 From 26b8345abc75a7404716864710930407b7d873f9 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Mon, 6 Dec 2021 20:54:27 +0100 Subject: KVM: Resolve memslot ID via a hash table instead of via a static array Memslot ID to the corresponding memslot mappings are currently kept as indices in static id_to_index array. The size of this array depends on the maximum allowed memslot count (regardless of the number of memslots actually in use). This has become especially problematic recently, when memslot count cap was removed, so the maximum count is now full 32k memslots - the maximum allowed by the current KVM API. Keeping these IDs in a hash table (instead of an array) avoids this problem. Resolving a memslot ID to the actual memslot (instead of its index) will also enable transitioning away from an array-based implementation of the whole memslots structure in a later commit. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Maciej S. Szmigiero Message-Id: <117fb2c04320e6cd6cf34f205a72eadb0aa8d5f9.1638817640.git.maciej.szmigiero@oracle.com> --- include/linux/kvm_host.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2f80ce84fbcf..79db70a8323e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -426,6 +427,7 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1) struct kvm_memory_slot { + struct hlist_node id_node; gfn_t base_gfn; unsigned long npages; unsigned long *dirty_bitmap; @@ -527,8 +529,15 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) */ struct kvm_memslots { u64 generation; - /* The mapping table from slot id to the index in memslots[]. */ - short id_to_index[KVM_MEM_SLOTS_NUM]; + /* + * The mapping table from slot id to the index in memslots[]. + * + * 7-bit bucket count matches the size of the old id to index array for + * 512 slots, while giving good performance with this slot count. + * Higher bucket counts bring only small performance improvements but + * always result in higher memory usage (even for lower memslot counts). + */ + DECLARE_HASHTABLE(id_hash, 7); atomic_t last_used_slot; int used_slots; struct kvm_memory_slot memslots[]; @@ -796,16 +805,14 @@ static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu) static inline struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id) { - int index = slots->id_to_index[id]; struct kvm_memory_slot *slot; - if (index < 0) - return NULL; - - slot = &slots->memslots[index]; + hash_for_each_possible(slots->id_hash, slot, id_node, id) { + if (slot->id == id) + return slot; + } - WARN_ON(slot->id != id); - return slot; + return NULL; } /* -- cgit v1.2.3 From ed922739c9199bf515a3e7fec3e319ce1edeef2a Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Mon, 6 Dec 2021 20:54:28 +0100 Subject: KVM: Use interval tree to do fast hva lookup in memslots The current memslots implementation only allows quick binary search by gfn, quick lookup by hva is not possible - the implementation has to do a linear scan of the whole memslots array, even though the operation being performed might apply just to a single memslot. This significantly hurts performance of per-hva operations with higher memslot counts. Since hva ranges can overlap between memslots an interval tree is needed for tracking them. [sean: handle interval tree updates in kvm_replace_memslot()] Signed-off-by: Maciej S. Szmigiero Message-Id: --- include/linux/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 79db70a8323e..9552ad6d6652 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -428,6 +429,7 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) struct kvm_memory_slot { struct hlist_node id_node; + struct interval_tree_node hva_node; gfn_t base_gfn; unsigned long npages; unsigned long *dirty_bitmap; @@ -529,6 +531,7 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) */ struct kvm_memslots { u64 generation; + struct rb_root_cached hva_tree; /* * The mapping table from slot id to the index in memslots[]. * -- cgit v1.2.3 From a54d806688fe1e482350ce759a8a0fc9ebf814b0 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Mon, 6 Dec 2021 20:54:30 +0100 Subject: KVM: Keep memslots in tree-based structures instead of array-based ones The current memslot code uses a (reverse gfn-ordered) memslot array for keeping track of them. Because the memslot array that is currently in use cannot be modified every memslot management operation (create, delete, move, change flags) has to make a copy of the whole array so it has a scratch copy to work on. Strictly speaking, however, it is only necessary to make copy of the memslot that is being modified, copying all the memslots currently present is just a limitation of the array-based memslot implementation. Two memslot sets, however, are still needed so the VM continues to run on the currently active set while the requested operation is being performed on the second, currently inactive one. In order to have two memslot sets, but only one copy of actual memslots it is necessary to split out the memslot data from the memslot sets. The memslots themselves should be also kept independent of each other so they can be individually added or deleted. These two memslot sets should normally point to the same set of memslots. They can, however, be desynchronized when performing a memslot management operation by replacing the memslot to be modified by its copy. After the operation is complete, both memslot sets once again point to the same, common set of memslot data. This commit implements the aforementioned idea. For tracking of gfns an ordinary rbtree is used since memslots cannot overlap in the guest address space and so this data structure is sufficient for ensuring that lookups are done quickly. The "last used slot" mini-caches (both per-slot set one and per-vCPU one), that keep track of the last found-by-gfn memslot, are still present in the new code. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Maciej S. Szmigiero Message-Id: <17c0cf3663b760a0d3753d4ac08c0753e941b811.1638817641.git.maciej.szmigiero@oracle.com> --- include/linux/kvm_host.h | 143 +++++++++++++++++++++++------------------------ 1 file changed, 71 insertions(+), 72 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9552ad6d6652..9eda8a63feae 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -358,11 +359,13 @@ struct kvm_vcpu { struct kvm_dirty_ring dirty_ring; /* - * The index of the most recently used memslot by this vCPU. It's ok - * if this becomes stale due to memslot changes since we always check - * it is a valid slot. + * The most recently used memslot by this vCPU and the slots generation + * for which it is valid. + * No wraparound protection is needed since generations won't overflow in + * thousands of years, even assuming 1M memslot operations per second. */ - int last_used_slot; + struct kvm_memory_slot *last_used_slot; + u64 last_used_slot_gen; }; /* must be called with irqs disabled */ @@ -427,9 +430,26 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) */ #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1) +/* + * Since at idle each memslot belongs to two memslot sets it has to contain + * two embedded nodes for each data structure that it forms a part of. + * + * Two memslot sets (one active and one inactive) are necessary so the VM + * continues to run on one memslot set while the other is being modified. + * + * These two memslot sets normally point to the same set of memslots. + * They can, however, be desynchronized when performing a memslot management + * operation by replacing the memslot to be modified by its copy. + * After the operation is complete, both memslot sets once again point to + * the same, common set of memslot data. + * + * The memslots themselves are independent of each other so they can be + * individually added or deleted. + */ struct kvm_memory_slot { - struct hlist_node id_node; - struct interval_tree_node hva_node; + struct hlist_node id_node[2]; + struct interval_tree_node hva_node[2]; + struct rb_node gfn_node[2]; gfn_t base_gfn; unsigned long npages; unsigned long *dirty_bitmap; @@ -524,16 +544,13 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) } #endif -/* - * Note: - * memslots are not sorted by id anymore, please use id_to_memslot() - * to get the memslot by its id. - */ struct kvm_memslots { u64 generation; + atomic_long_t last_used_slot; struct rb_root_cached hva_tree; + struct rb_root gfn_tree; /* - * The mapping table from slot id to the index in memslots[]. + * The mapping table from slot id to memslot. * * 7-bit bucket count matches the size of the old id to index array for * 512 slots, while giving good performance with this slot count. @@ -541,9 +558,7 @@ struct kvm_memslots { * always result in higher memory usage (even for lower memslot counts). */ DECLARE_HASHTABLE(id_hash, 7); - atomic_t last_used_slot; - int used_slots; - struct kvm_memory_slot memslots[]; + int node_idx; }; struct kvm { @@ -565,6 +580,9 @@ struct kvm { struct mutex slots_arch_lock; struct mm_struct *mm; /* userspace tied to this vm */ unsigned long nr_memslot_pages; + /* The two memslot sets - active and inactive (per address space) */ + struct kvm_memslots __memslots[KVM_ADDRESS_SPACE_NUM][2]; + /* The current active memslot set for each address space */ struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; struct xarray vcpu_array; @@ -739,11 +757,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) return NULL; } -#define kvm_for_each_memslot(memslot, slots) \ - for (memslot = &slots->memslots[0]; \ - memslot < slots->memslots + slots->used_slots; memslot++) \ - if (WARN_ON_ONCE(!memslot->npages)) { \ - } else +static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) +{ + return vcpu->vcpu_idx; +} void kvm_destroy_vcpus(struct kvm *kvm); @@ -805,12 +822,23 @@ static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu) return __kvm_memslots(vcpu->kvm, as_id); } +static inline bool kvm_memslots_empty(struct kvm_memslots *slots) +{ + return RB_EMPTY_ROOT(&slots->gfn_tree); +} + +#define kvm_for_each_memslot(memslot, bkt, slots) \ + hash_for_each(slots->id_hash, bkt, memslot, id_node[slots->node_idx]) \ + if (WARN_ON_ONCE(!memslot->npages)) { \ + } else + static inline struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id) { struct kvm_memory_slot *slot; + int idx = slots->node_idx; - hash_for_each_possible(slots->id_hash, slot, id_node, id) { + hash_for_each_possible(slots->id_hash, slot, id_node[idx], id) { if (slot->id == id) return slot; } @@ -1214,25 +1242,15 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); /* - * Returns a pointer to the memslot at slot_index if it contains gfn. + * Returns a pointer to the memslot if it contains gfn. * Otherwise returns NULL. */ static inline struct kvm_memory_slot * -try_get_memslot(struct kvm_memslots *slots, int slot_index, gfn_t gfn) +try_get_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { - struct kvm_memory_slot *slot; - - if (slot_index < 0 || slot_index >= slots->used_slots) + if (!slot) return NULL; - /* - * slot_index can come from vcpu->last_used_slot which is not kept - * in sync with userspace-controllable memslot deletion. So use nospec - * to prevent the CPU from speculating past the end of memslots[]. - */ - slot_index = array_index_nospec(slot_index, slots->used_slots); - slot = &slots->memslots[slot_index]; - if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages) return slot; else @@ -1240,65 +1258,46 @@ try_get_memslot(struct kvm_memslots *slots, int slot_index, gfn_t gfn) } /* - * Returns a pointer to the memslot that contains gfn and records the index of - * the slot in index. Otherwise returns NULL. + * Returns a pointer to the memslot that contains gfn. Otherwise returns NULL. * * With "approx" set returns the memslot also when the address falls * in a hole. In that case one of the memslots bordering the hole is * returned. - * - * IMPORTANT: Slots are sorted from highest GFN to lowest GFN! */ static inline struct kvm_memory_slot * -search_memslots(struct kvm_memslots *slots, gfn_t gfn, int *index, bool approx) +search_memslots(struct kvm_memslots *slots, gfn_t gfn, bool approx) { - int start = 0, end = slots->used_slots; - struct kvm_memory_slot *memslots = slots->memslots; struct kvm_memory_slot *slot; - - if (unlikely(!slots->used_slots)) - return NULL; - - while (start < end) { - int slot = start + (end - start) / 2; - - if (gfn >= memslots[slot].base_gfn) - end = slot; - else - start = slot + 1; - } - - if (approx && start >= slots->used_slots) { - *index = slots->used_slots - 1; - return &memslots[slots->used_slots - 1]; - } - - slot = try_get_memslot(slots, start, gfn); - if (slot) { - *index = start; - return slot; - } - if (approx) { - *index = start; - return &memslots[start]; + struct rb_node *node; + int idx = slots->node_idx; + + slot = NULL; + for (node = slots->gfn_tree.rb_node; node; ) { + slot = container_of(node, struct kvm_memory_slot, gfn_node[idx]); + if (gfn >= slot->base_gfn) { + if (gfn < slot->base_gfn + slot->npages) + return slot; + node = node->rb_right; + } else + node = node->rb_left; } - return NULL; + return approx ? slot : NULL; } static inline struct kvm_memory_slot * ____gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn, bool approx) { struct kvm_memory_slot *slot; - int slot_index = atomic_read(&slots->last_used_slot); - slot = try_get_memslot(slots, slot_index, gfn); + slot = (struct kvm_memory_slot *)atomic_long_read(&slots->last_used_slot); + slot = try_get_memslot(slot, gfn); if (slot) return slot; - slot = search_memslots(slots, gfn, &slot_index, approx); + slot = search_memslots(slots, gfn, approx); if (slot) { - atomic_set(&slots->last_used_slot, slot_index); + atomic_long_set(&slots->last_used_slot, (unsigned long)slot); return slot; } -- cgit v1.2.3 From f4209439b522432d140d33393d4a3f12e695527b Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Mon, 6 Dec 2021 20:54:32 +0100 Subject: KVM: Optimize gfn lookup in kvm_zap_gfn_range() Introduce a memslots gfn upper bound operation and use it to optimize kvm_zap_gfn_range(). This way this handler can do a quick lookup for intersecting gfns and won't have to do a linear scan of the whole memslot set. Signed-off-by: Maciej S. Szmigiero Message-Id: --- include/linux/kvm_host.h | 94 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9eda8a63feae..3bc98497e796 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -846,6 +846,100 @@ struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id) return NULL; } +/* Iterator used for walking memslots that overlap a gfn range. */ +struct kvm_memslot_iter { + struct kvm_memslots *slots; + struct rb_node *node; + struct kvm_memory_slot *slot; +}; + +static inline void kvm_memslot_iter_next(struct kvm_memslot_iter *iter) +{ + iter->node = rb_next(iter->node); + if (!iter->node) + return; + + iter->slot = container_of(iter->node, struct kvm_memory_slot, gfn_node[iter->slots->node_idx]); +} + +static inline void kvm_memslot_iter_start(struct kvm_memslot_iter *iter, + struct kvm_memslots *slots, + gfn_t start) +{ + int idx = slots->node_idx; + struct rb_node *tmp; + struct kvm_memory_slot *slot; + + iter->slots = slots; + + /* + * Find the so called "upper bound" of a key - the first node that has + * its key strictly greater than the searched one (the start gfn in our case). + */ + iter->node = NULL; + for (tmp = slots->gfn_tree.rb_node; tmp; ) { + slot = container_of(tmp, struct kvm_memory_slot, gfn_node[idx]); + if (start < slot->base_gfn) { + iter->node = tmp; + tmp = tmp->rb_left; + } else { + tmp = tmp->rb_right; + } + } + + /* + * Find the slot with the lowest gfn that can possibly intersect with + * the range, so we'll ideally have slot start <= range start + */ + if (iter->node) { + /* + * A NULL previous node means that the very first slot + * already has a higher start gfn. + * In this case slot start > range start. + */ + tmp = rb_prev(iter->node); + if (tmp) + iter->node = tmp; + } else { + /* a NULL node below means no slots */ + iter->node = rb_last(&slots->gfn_tree); + } + + if (iter->node) { + iter->slot = container_of(iter->node, struct kvm_memory_slot, gfn_node[idx]); + + /* + * It is possible in the slot start < range start case that the + * found slot ends before or at range start (slot end <= range start) + * and so it does not overlap the requested range. + * + * In such non-overlapping case the next slot (if it exists) will + * already have slot start > range start, otherwise the logic above + * would have found it instead of the current slot. + */ + if (iter->slot->base_gfn + iter->slot->npages <= start) + kvm_memslot_iter_next(iter); + } +} + +static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_t end) +{ + if (!iter->node) + return false; + + /* + * If this slot starts beyond or at the end of the range so does + * every next one + */ + return iter->slot->base_gfn < end; +} + +/* Iterate over each memslot at least partially intersecting [start, end) range */ +#define kvm_for_each_memslot_in_gfn_range(iter, slots, start, end) \ + for (kvm_memslot_iter_start(iter, slots, start); \ + kvm_memslot_iter_is_valid(iter, end); \ + kvm_memslot_iter_next(iter)) + /* * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: * - create a new memory slot -- cgit v1.2.3 From 8283e36abfff507c64fe8289ac30ea7ab59648aa Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Mon, 15 Nov 2021 15:45:58 -0800 Subject: KVM: x86/mmu: Propagate memslot const qualifier In preparation for implementing in-place hugepage promotion, various functions will need to be called from zap_collapsible_spte_range, which has the const qualifier on its memslot argument. Propagate the const qualifier to the various functions which will be needed. This just serves to simplify the following patch. No functional change intended. Signed-off-by: Ben Gardon Message-Id: <20211115234603.2908381-11-bgardon@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3bc98497e796..3eb7695aaa73 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -460,7 +460,7 @@ struct kvm_memory_slot { u16 as_id; }; -static inline bool kvm_slot_dirty_track_enabled(struct kvm_memory_slot *slot) +static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot) { return slot->flags & KVM_MEM_LOG_DIRTY_PAGES; } @@ -994,9 +994,9 @@ void kvm_set_page_accessed(struct page *page); kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable); -kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); -kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn); -kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, +kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn); +kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn); +kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, bool *async, bool write_fault, bool *writable, hva_t *hva); @@ -1073,7 +1073,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); -void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn); +void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 510958e997217e39a16b47afb5a44dfa39013964 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 8 Oct 2021 19:11:57 -0700 Subject: KVM: Force PPC to define its own rcuwait object Do not define/reference kvm_vcpu.wait if __KVM_HAVE_ARCH_WQP is true, and instead force the architecture (PPC) to define its own rcuwait object. Allowing common KVM to directly access vcpu->wait without a guard makes it all too easy to introduce potential bugs, e.g. kvm_vcpu_block(), kvm_vcpu_on_spin(), and async_pf_execute() all operate on vcpu->wait, not the result of kvm_arch_vcpu_get_wait(), and so may do the wrong thing for PPC. Due to PPC's shenanigans with respect to callbacks and waits (it switches to the virtual core's wait object at KVM_RUN!?!?), it's not clear whether or not this fixes any bugs. Signed-off-by: Sean Christopherson Message-Id: <20211009021236.4122790-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3eb7695aaa73..afacbfb2e482 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -314,7 +314,9 @@ struct kvm_vcpu { struct mutex mutex; struct kvm_run *run; +#ifndef __KVM_HAVE_ARCH_WQP struct rcuwait wait; +#endif struct pid __rcu *pid; int sigset_active; sigset_t sigset; -- cgit v1.2.3 From 91b99ea7065786d0bff1c9281b002455dbaeb08b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 8 Oct 2021 19:12:06 -0700 Subject: KVM: Rename kvm_vcpu_block() => kvm_vcpu_halt() Rename kvm_vcpu_block() to kvm_vcpu_halt() in preparation for splitting the actual "block" sequences into a separate helper (to be named kvm_vcpu_block()). x86 will use the standalone block-only path to handle non-halt cases where the vCPU is not runnable. Rename block_ns to halt_ns to match the new function name. No functional change intended. Reviewed-by: David Matlack Reviewed-by: Christian Borntraeger Signed-off-by: Sean Christopherson Message-Id: <20211009021236.4122790-14-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index afacbfb2e482..ea3c22d55d56 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1102,7 +1102,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_sigset_activate(struct kvm_vcpu *vcpu); void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); -void kvm_vcpu_block(struct kvm_vcpu *vcpu); +void kvm_vcpu_halt(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From fac4268894394213127e43856f41d10f29131e69 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 8 Oct 2021 19:12:07 -0700 Subject: KVM: Split out a kvm_vcpu_block() helper from kvm_vcpu_halt() Factor out the "block" part of kvm_vcpu_halt() so that x86 can emulate non-halt wait/sleep/block conditions that should not be subjected to halt-polling. No functional change intended. Reviewed-by: Christian Borntraeger Reviewed-by: David Matlack Signed-off-by: Sean Christopherson Message-Id: <20211009021236.4122790-15-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ea3c22d55d56..bd13c5b5bd1d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1103,6 +1103,7 @@ void kvm_sigset_activate(struct kvm_vcpu *vcpu); void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); void kvm_vcpu_halt(struct kvm_vcpu *vcpu); +bool kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From c3858335c711569b82a234a560dc19247e8f3fcc Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Fri, 8 Oct 2021 19:12:08 -0700 Subject: KVM: stats: Add stat to detect if vcpu is currently blocking Add a "blocking" stat that userspace can use to detect the case where a vCPU is not being run because of an vCPU/guest action, e.g. HLT or WFS on x86, WFI on arm64, etc... Current guest/host/halt stats don't show this well, e.g. if a guest halts for a long period of time then the vCPU could could appear pathologically blocked due to a host condition, when in reality the vCPU has been put into a not-runnable state by the guest. Originally-by: Cannon Matthews Suggested-by: Sean Christopherson Reviewed-by: David Matlack Signed-off-by: Jing Zhang [sean: renamed stat to "blocking", massaged changelog] Signed-off-by: Sean Christopherson Message-Id: <20211009021236.4122790-16-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 ++- include/linux/kvm_types.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bd13c5b5bd1d..dc7740cafea7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1587,7 +1587,8 @@ struct _kvm_stats_desc { STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_hist, \ HALT_POLL_HIST_COUNT), \ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist, \ - HALT_POLL_HIST_COUNT) + HALT_POLL_HIST_COUNT), \ + STATS_DESC_ICOUNTER(VCPU_GENERIC, blocking) extern struct dentry *kvm_debugfs_dir; diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 234eab059839..888ef12862c9 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -87,6 +87,7 @@ struct kvm_vcpu_stat_generic { u64 halt_poll_success_hist[HALT_POLL_HIST_COUNT]; u64 halt_poll_fail_hist[HALT_POLL_HIST_COUNT]; u64 halt_wait_hist[HALT_POLL_HIST_COUNT]; + u64 blocking; }; #define KVM_STATS_NAME_SIZE 48 -- cgit v1.2.3 From d92a5d1c6c757f659ffb9c2c2e65fcf3d571c14e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 8 Oct 2021 19:12:12 -0700 Subject: KVM: Add helpers to wake/query blocking vCPU Add helpers to wake and query a blocking vCPU. In addition to providing nice names, the helpers reduce the probability of KVM neglecting to use kvm_arch_vcpu_get_wait(). No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20211009021236.4122790-20-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index dc7740cafea7..f8ed799e8674 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1286,6 +1286,20 @@ static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu) #endif } +/* + * Wake a vCPU if necessary, but don't do any stats/metadata updates. Returns + * true if the vCPU was blocking and was awakened, false otherwise. + */ +static inline bool __kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) +{ + return !!rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu)); +} + +static inline bool kvm_vcpu_is_blocking(struct kvm_vcpu *vcpu) +{ + return rcuwait_active(kvm_arch_vcpu_get_wait(vcpu)); +} + #ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED /* * returns true if the virtual interrupt controller is initialized and -- cgit v1.2.3 From 265d27caf95f8959ddd4a33e52c4b4dc4e31c308 Mon Sep 17 00:00:00 2001 From: Kavyasree Kotagiri Date: Wed, 3 Nov 2021 11:49:33 +0530 Subject: dt-bindings: clock: lan966x: Add binding includes for lan966x SoC clock IDs LAN966X supports 14 clock outputs for its peripherals. This include file is introduced to use identifiers for clocks. Signed-off-by: Kavyasree Kotagiri Acked-by: Rob Herring Acked-by: Nicolas Ferre Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20211103061935.25677-2-kavyasree.kotagiri@microchip.com --- include/dt-bindings/clock/microchip,lan966x.h | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/dt-bindings/clock/microchip,lan966x.h (limited to 'include') diff --git a/include/dt-bindings/clock/microchip,lan966x.h b/include/dt-bindings/clock/microchip,lan966x.h new file mode 100644 index 000000000000..fe36ed6d8b5f --- /dev/null +++ b/include/dt-bindings/clock/microchip,lan966x.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2021 Microchip Inc. + * + * Author: Kavyasree Kotagiri + */ + +#ifndef _DT_BINDINGS_CLK_LAN966X_H +#define _DT_BINDINGS_CLK_LAN966X_H + +#define GCK_ID_QSPI0 0 +#define GCK_ID_QSPI1 1 +#define GCK_ID_QSPI2 2 +#define GCK_ID_SDMMC0 3 +#define GCK_ID_PI 4 +#define GCK_ID_MCAN0 5 +#define GCK_ID_MCAN1 6 +#define GCK_ID_FLEXCOM0 7 +#define GCK_ID_FLEXCOM1 8 +#define GCK_ID_FLEXCOM2 9 +#define GCK_ID_FLEXCOM3 10 +#define GCK_ID_FLEXCOM4 11 +#define GCK_ID_TIMER 12 +#define GCK_ID_USB_REFCLK 13 + +#define N_CLOCKS 14 + +#endif -- cgit v1.2.3 From 815f0e738a8d5663a02350e2580706829144a722 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 3 Nov 2021 09:50:59 +0100 Subject: clk: gate: Add devm_clk_hw_register_gate() Add devm_clk_hw_register_gate() - devres-managed version of clk_hw_register_gate() Suggested-by: Stephen Boyd Signed-off-by: Horatiu Vultur Acked-by: Nicolas Ferre Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20211103085102.1656081-2-horatiu.vultur@microchip.com --- include/linux/clk-provider.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index f59c875271a0..2faa6f7aa8a8 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -490,6 +490,13 @@ struct clk_hw *__clk_hw_register_gate(struct device *dev, unsigned long flags, void __iomem *reg, u8 bit_idx, u8 clk_gate_flags, spinlock_t *lock); +struct clk_hw *__devm_clk_hw_register_gate(struct device *dev, + struct device_node *np, const char *name, + const char *parent_name, const struct clk_hw *parent_hw, + const struct clk_parent_data *parent_data, + unsigned long flags, + void __iomem *reg, u8 bit_idx, + u8 clk_gate_flags, spinlock_t *lock); struct clk *clk_register_gate(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 bit_idx, @@ -544,6 +551,22 @@ struct clk *clk_register_gate(struct device *dev, const char *name, __clk_hw_register_gate((dev), NULL, (name), NULL, NULL, (parent_data), \ (flags), (reg), (bit_idx), \ (clk_gate_flags), (lock)) +/** + * devm_clk_hw_register_gate - register a gate clock with the clock framework + * @dev: device that is registering this clock + * @name: name of this clock + * @parent_name: name of this clock's parent + * @flags: framework-specific flags for this clock + * @reg: register address to control gating of this clock + * @bit_idx: which bit in the register controls gating of this clock + * @clk_gate_flags: gate-specific flags for this clock + * @lock: shared register lock for this clock + */ +#define devm_clk_hw_register_gate(dev, name, parent_name, flags, reg, bit_idx,\ + clk_gate_flags, lock) \ + __devm_clk_hw_register_gate((dev), NULL, (name), (parent_name), NULL, \ + NULL, (flags), (reg), (bit_idx), \ + (clk_gate_flags), (lock)) void clk_unregister_gate(struct clk *clk); void clk_hw_unregister_gate(struct clk_hw *hw); int clk_gate_is_enabled(struct clk_hw *hw); -- cgit v1.2.3 From 51d0a37dde9b63111d14a59bb77c5cf0273e4c9e Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 3 Nov 2021 09:51:01 +0100 Subject: dt-bindings: clock: lan966x: Extend includes with clock gates On lan966x it is allow to control the clock to some peripherals like USB. So extend the include file with these clocks. Acked-by: Rob Herring Signed-off-by: Horatiu Vultur Acked-by: Nicolas Ferre Signed-off-by: Nicolas Ferre Link: https://lore.kernel.org/r/20211103085102.1656081-4-horatiu.vultur@microchip.com --- include/dt-bindings/clock/microchip,lan966x.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/clock/microchip,lan966x.h b/include/dt-bindings/clock/microchip,lan966x.h index fe36ed6d8b5f..6f9d43d76d5a 100644 --- a/include/dt-bindings/clock/microchip,lan966x.h +++ b/include/dt-bindings/clock/microchip,lan966x.h @@ -23,6 +23,12 @@ #define GCK_ID_TIMER 12 #define GCK_ID_USB_REFCLK 13 -#define N_CLOCKS 14 +/* Gate clocks */ +#define GCK_GATE_UHPHS 14 +#define GCK_GATE_UDPHS 15 +#define GCK_GATE_MCRAMC 16 +#define GCK_GATE_HMATRIX 17 + +#define N_CLOCKS 18 #endif -- cgit v1.2.3 From 7cfa3d00730a4c0694b55fb1974823baeab8815b Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Wed, 8 Dec 2021 18:17:18 +0800 Subject: ASoC: rt5682s: add delay time to fix pop sound issue There is a pop noise at the beginning of the capture data. This patch adds the delay time before stereo1 ADC unmute to fix the pop sound issue. Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20211208101718.28945-1-shumingf@realtek.com Signed-off-by: Mark Brown --- include/sound/rt5682s.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/rt5682s.h b/include/sound/rt5682s.h index accfbc2dcdd2..f18d91308b9a 100644 --- a/include/sound/rt5682s.h +++ b/include/sound/rt5682s.h @@ -40,6 +40,7 @@ struct rt5682s_platform_data { enum rt5682s_jd_src jd_src; unsigned int dmic_clk_rate; unsigned int dmic_delay; + unsigned int amic_delay; bool dmic_clk_driving_high; const char *dai_clk_names[RT5682S_DAI_NUM_CLKS]; -- cgit v1.2.3 From 444dd878e85fb33fcfb2682cfdab4c236f33ea3e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Dec 2021 17:19:47 +0100 Subject: PM: runtime: Fix pm_runtime_active() kerneldoc comment The kerneldoc comment of pm_runtime_active() does not reflect the behavior of the function, so update it accordingly. Fixes: 403d2d116ec0 ("PM: runtime: Add kerneldoc comments to multiple helpers") Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson --- include/linux/pm_runtime.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 222da43b7096..eddd66d426ca 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -129,7 +129,7 @@ static inline bool pm_runtime_suspended(struct device *dev) * pm_runtime_active - Check whether or not a device is runtime-active. * @dev: Target device. * - * Return %true if runtime PM is enabled for @dev and its runtime PM status is + * Return %true if runtime PM is disabled for @dev or its runtime PM status is * %RPM_ACTIVE, or %false otherwise. * * Note that the return value of this function can only be trusted if it is -- cgit v1.2.3 From 74d9555580c48a04b2c3b742dfb0c80777aa0b26 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 8 Nov 2021 16:09:41 +0000 Subject: PM: hibernate: Allow ACPI hardware signature to be honoured Theoretically, when the hardware signature in FACS changes, the OS is supposed to gracefully decline to attempt to resume from S4: "If the signature has changed, OSPM will not restore the system context and can boot from scratch" In practice, Windows doesn't do this and many laptop vendors do allow the signature to change especially when docking/undocking, so it would be a bad idea to simply comply with the specification by default in the general case. However, there are use cases where we do want the compliant behaviour and we know it's safe. Specifically, when resuming virtual machines where we know the hypervisor has changed sufficiently that resume will fail. We really want to be able to *tell* the guest kernel not to try, so it boots cleanly and doesn't just crash. This patch provides a way to opt in to the spec-compliant behaviour on the command line. A follow-up patch may do this automatically for certain "known good" machines based on a DMI match, or perhaps just for all hypervisor guests since there's no good reason a hypervisor would change the hardware_signature that it exposes to guests *unless* it wants them to obey the ACPI specification. Signed-off-by: David Woodhouse Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 2 +- include/linux/suspend.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index b28f8790192a..6c0798db6bde 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -506,7 +506,7 @@ acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, int acpi_resources_are_enforced(void); #ifdef CONFIG_HIBERNATION -void __init acpi_no_s4_hw_signature(void); +void __init acpi_check_s4_hw_signature(int check); #endif #ifdef CONFIG_PM_SLEEP diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 8af13ba60c7e..5785d909c321 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -446,6 +446,7 @@ extern unsigned long get_safe_page(gfp_t gfp_mask); extern asmlinkage int swsusp_arch_suspend(void); extern asmlinkage int swsusp_arch_resume(void); +extern u32 swsusp_hardware_signature; extern void hibernation_set_ops(const struct platform_hibernation_ops *ops); extern int hibernate(void); extern bool system_entering_hibernation(void); -- cgit v1.2.3 From 8260b9820f7050461b8969305bbd8cb5654f0c74 Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Mon, 6 Dec 2021 16:55:03 +0300 Subject: x86/sev: Use CC_ATTR attribute to generalize string I/O unroll INS/OUTS are not supported in TDX guests and cause #UD. Kernel has to avoid them when running in TDX guest. To support existing usage, string I/O operations are unrolled using IN/OUT instructions. AMD SEV platform implements this support by adding unroll logic in ins#bwl()/outs#bwl() macros with SEV-specific checks. Since TDX VM guests will also need similar support, use CC_ATTR_GUEST_UNROLL_STRING_IO and generic cc_platform_has() API to implement it. String I/O helpers were the last users of sev_key_active() interface and sev_enable_key static key. Remove them. [ bp: Move comment too and do not delete it. ] Suggested-by: Tom Lendacky Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Tom Lendacky Tested-by: Tom Lendacky Link: https://lkml.kernel.org/r/20211206135505.75045-2-kirill.shutemov@linux.intel.com --- include/linux/cc_platform.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index a075b70b9a70..efd8205282da 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -61,6 +61,17 @@ enum cc_attr { * Examples include SEV-ES. */ CC_ATTR_GUEST_STATE_ENCRYPT, + + /** + * @CC_ATTR_GUEST_UNROLL_STRING_IO: String I/O is implemented with + * IN/OUT instructions + * + * The platform/OS is running as a guest/virtual machine and uses + * IN/OUT instructions in place of string I/O. + * + * Examples include TDX guest & SEV. + */ + CC_ATTR_GUEST_UNROLL_STRING_IO, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM -- cgit v1.2.3 From 52407c220c44c8dcc6aa8aa35ffc8a2db3c849a9 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 2 Dec 2021 22:35:43 -0800 Subject: drm/i915/rpl-s: Add PCI IDS for Raptor Lake S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Raptor Lake S(RPL-S) is a version 12 Display, Media and Render. For all i915 purposes it is the same as Alder Lake S (ADL-S). Introduce RPL-S as a subplatform of ADL-S. This patch adds PCI ids for RPL-S. BSpec: 53655 Cc: x86@kernel.org Cc: dri-devel@lists.freedesktop.org Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Matt Roper Cc: Jani Nikula Signed-off-by: Anusha Srivatsa Reviewed-by: José Roberto de Souza Acked-by: Dave Hansen # arch/x86 Acked-by: Tvrtko Ursulin Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20211203063545.2254380-2-anusha.srivatsa@intel.com --- include/drm/i915_pciids.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index c00ac54692d7..baf3d1d3d566 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -666,4 +666,13 @@ INTEL_VGA_DEVICE(0x46C2, info), \ INTEL_VGA_DEVICE(0x46C3, info) +/* RPL-S */ +#define INTEL_RPLS_IDS(info) \ + INTEL_VGA_DEVICE(0xA780, info), \ + INTEL_VGA_DEVICE(0xA781, info), \ + INTEL_VGA_DEVICE(0xA782, info), \ + INTEL_VGA_DEVICE(0xA783, info), \ + INTEL_VGA_DEVICE(0xA788, info), \ + INTEL_VGA_DEVICE(0xA789, info) + #endif /* _I915_PCIIDS_H */ -- cgit v1.2.3 From 3f9bb0301d50ce27421eff4b710c2bbe58111a83 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 6 Dec 2021 18:57:47 +0200 Subject: net: dsa: make dp->bridge_num one-based MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I have seen too many bugs already due to the fact that we must encode an invalid dp->bridge_num as a negative value, because the natural tendency is to check that invalid value using (!dp->bridge_num). Latest example can be seen in commit 1bec0f05062c ("net: dsa: fix bridge_num not getting cleared after ports leaving the bridge"). Convert the existing users to assume that dp->bridge_num == 0 is the encoding for invalid, and valid bridge numbers start from 1. Signed-off-by: Vladimir Oltean Reviewed-by: Alvin Šipraga Signed-off-by: Jakub Kicinski --- include/linux/dsa/8021q.h | 6 +++--- include/net/dsa.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 254b165f2b44..0af4371fbebb 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -38,13 +38,13 @@ void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id); int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, struct net_device *br, - int bridge_num); + unsigned int bridge_num); void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, struct net_device *br, - int bridge_num); + unsigned int bridge_num); -u16 dsa_8021q_bridge_tx_fwd_offload_vid(int bridge_num); +u16 dsa_8021q_bridge_tx_fwd_offload_vid(unsigned int bridge_num); u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp); diff --git a/include/net/dsa.h b/include/net/dsa.h index 8ca9d50cbbc2..a23cfbaa09d6 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -257,7 +257,7 @@ struct dsa_port { bool learning; u8 stp_state; struct net_device *bridge_dev; - int bridge_num; + unsigned int bridge_num; struct devlink_port devlink_port; bool devlink_port_setup; struct phylink *pl; @@ -754,11 +754,11 @@ struct dsa_switch_ops { /* Called right after .port_bridge_join() */ int (*port_bridge_tx_fwd_offload)(struct dsa_switch *ds, int port, struct net_device *bridge, - int bridge_num); + unsigned int bridge_num); /* Called right before .port_bridge_leave() */ void (*port_bridge_tx_fwd_unoffload)(struct dsa_switch *ds, int port, struct net_device *bridge, - int bridge_num); + unsigned int bridge_num); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); void (*port_fast_age)(struct dsa_switch *ds, int port); -- cgit v1.2.3 From 947c8746e2c31bb469ba9ee02dc739be6a98ee38 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 6 Dec 2021 18:57:48 +0200 Subject: net: dsa: assign a bridge number even without TX forwarding offload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The service where DSA assigns a unique bridge number for each forwarding domain is useful even for drivers which do not implement the TX forwarding offload feature. For example, drivers might use the dp->bridge_num for FDB isolation. So rename ds->num_fwd_offloading_bridges to ds->max_num_bridges, and calculate a unique bridge_num for all drivers that set this value. Signed-off-by: Vladimir Oltean Reviewed-by: Alvin Šipraga Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index a23cfbaa09d6..00fbd87ae4ff 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -413,12 +413,12 @@ struct dsa_switch { */ unsigned int num_lag_ids; - /* Drivers that support bridge forwarding offload should set this to - * the maximum number of bridges spanning the same switch tree (or all - * trees, in the case of cross-tree bridging support) that can be - * offloaded. + /* Drivers that support bridge forwarding offload or FDB isolation + * should set this to the maximum number of bridges spanning the same + * switch tree (or all trees, in the case of cross-tree bridging + * support) that can be offloaded. */ - unsigned int num_fwd_offloading_bridges; + unsigned int max_num_bridges; size_t num_ports; }; -- cgit v1.2.3 From 36cbf39b56908bb2e7e8e392e5f08895c3ca4326 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 6 Dec 2021 18:57:52 +0200 Subject: net: dsa: hide dp->bridge_dev and dp->bridge_num in the core behind helpers The location of the bridge device pointer and number is going to change. It is not going to be kept individually per port, but in a common structure allocated dynamically and which will have lockdep validation. Create helpers to access these elements so that we have a migration path to the new organization. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 00fbd87ae4ff..18bce0383267 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -599,6 +599,27 @@ struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) return dp->slave; } +static inline struct net_device * +dsa_port_bridge_dev_get(const struct dsa_port *dp) +{ + return dp->bridge_dev; +} + +static inline unsigned int dsa_port_bridge_num_get(struct dsa_port *dp) +{ + return dp->bridge_num; +} + +static inline bool dsa_port_bridge_same(const struct dsa_port *a, + const struct dsa_port *b) +{ + struct net_device *br_a = dsa_port_bridge_dev_get(a); + struct net_device *br_b = dsa_port_bridge_dev_get(b); + + /* Standalone ports are not in the same bridge with one another */ + return (!br_a || !br_b) ? false : (br_a == br_b); +} + typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { -- cgit v1.2.3 From 6a43cba3034015d1c029c8a81b62eb9c2660fd6e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 6 Dec 2021 18:57:55 +0200 Subject: net: dsa: export bridging offload helpers to drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the static inline helpers from net/dsa/dsa_priv.h to include/net/dsa.h, so that drivers can call functions such as dsa_port_offloads_bridge_dev(), which will be necessary after the transition to a more complex bridge structure. More functions than are needed right now are being moved, but this is done for uniformity. Signed-off-by: Vladimir Oltean Reviewed-by: Alvin Šipraga Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 18bce0383267..899e13d56fc2 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -620,6 +620,49 @@ static inline bool dsa_port_bridge_same(const struct dsa_port *a, return (!br_a || !br_b) ? false : (br_a == br_b); } +static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp, + const struct net_device *dev) +{ + return dsa_port_to_bridge_port(dp) == dev; +} + +static inline bool +dsa_port_offloads_bridge_dev(struct dsa_port *dp, + const struct net_device *bridge_dev) +{ + /* DSA ports connected to a bridge, and event was emitted + * for the bridge. + */ + return dsa_port_bridge_dev_get(dp) == bridge_dev; +} + +/* Returns true if any port of this tree offloads the given net_device */ +static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst, + const struct net_device *dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_offloads_bridge_port(dp, dev)) + return true; + + return false; +} + +/* Returns true if any port of this tree offloads the given bridge */ +static inline bool +dsa_tree_offloads_bridge_dev(struct dsa_switch_tree *dst, + const struct net_device *bridge_dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_offloads_bridge_dev(dp, bridge_dev)) + return true; + + return false; +} + typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { -- cgit v1.2.3 From d3eed0e57d5d1bcbf1bd60f83a4adfe7d7b8dd9c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 6 Dec 2021 18:57:56 +0200 Subject: net: dsa: keep the bridge_dev and bridge_num as part of the same structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The main desire behind this is to provide coherent bridge information to the fast path without locking. For example, right now we set dp->bridge_dev and dp->bridge_num from separate code paths, it is theoretically possible for a packet transmission to read these two port properties consecutively and find a bridge number which does not correspond with the bridge device. Another desire is to start passing more complex bridge information to dsa_switch_ops functions. For example, with FDB isolation, it is expected that drivers will need to be passed the bridge which requested an FDB/MDB entry to be offloaded, and along with that bridge_dev, the associated bridge_num should be passed too, in case the driver might want to implement an isolation scheme based on that number. We already pass the {bridge_dev, bridge_num} pair to the TX forwarding offload switch API, however we'd like to remove that and squash it into the basic bridge join/leave API. So that means we need to pass this pair to the bridge join/leave API. During dsa_port_bridge_leave, first we unset dp->bridge_dev, then we call the driver's .port_bridge_leave with what used to be our dp->bridge_dev, but provided as an argument. When bridge_dev and bridge_num get folded into a single structure, we need to preserve this behavior in dsa_port_bridge_leave: we need a copy of what used to be in dp->bridge. Switch drivers check bridge membership by comparing dp->bridge_dev with the provided bridge_dev, but now, if we provide the struct dsa_bridge as a pointer, they cannot keep comparing dp->bridge to the provided pointer, since this only points to an on-stack copy. To make this obvious and prevent driver writers from forgetting and doing stupid things, in this new API, the struct dsa_bridge is provided as a full structure (not very large, contains an int and a pointer) instead of a pointer. An explicit comparison function needs to be used to determine bridge membership: dsa_port_offloads_bridge(). Signed-off-by: Vladimir Oltean Reviewed-by: Alvin Šipraga Signed-off-by: Jakub Kicinski --- include/linux/dsa/8021q.h | 7 +++---- include/net/dsa.h | 34 +++++++++++++++++++++------------- 2 files changed, 24 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 0af4371fbebb..939a1beaddf7 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -7,6 +7,7 @@ #include #include +#include struct dsa_switch; struct dsa_port; @@ -37,12 +38,10 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id); int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, - struct net_device *br, - unsigned int bridge_num); + struct dsa_bridge bridge); void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, - struct net_device *br, - unsigned int bridge_num); + struct dsa_bridge bridge); u16 dsa_8021q_bridge_tx_fwd_offload_vid(unsigned int bridge_num); diff --git a/include/net/dsa.h b/include/net/dsa.h index 899e13d56fc2..b9789c0cd5e3 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -219,6 +219,11 @@ struct dsa_mall_tc_entry { }; }; +struct dsa_bridge { + struct net_device *dev; + unsigned int num; + refcount_t refcount; +}; struct dsa_port { /* A CPU port is physically connected to a master device. @@ -256,8 +261,7 @@ struct dsa_port { /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ bool learning; u8 stp_state; - struct net_device *bridge_dev; - unsigned int bridge_num; + struct dsa_bridge *bridge; struct devlink_port devlink_port; bool devlink_port_setup; struct phylink *pl; @@ -588,7 +592,7 @@ static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp) static inline struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) { - if (!dp->bridge_dev) + if (!dp->bridge) return NULL; if (dp->lag_dev) @@ -602,12 +606,12 @@ struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) static inline struct net_device * dsa_port_bridge_dev_get(const struct dsa_port *dp) { - return dp->bridge_dev; + return dp->bridge ? dp->bridge->dev : NULL; } static inline unsigned int dsa_port_bridge_num_get(struct dsa_port *dp) { - return dp->bridge_num; + return dp->bridge ? dp->bridge->num : 0; } static inline bool dsa_port_bridge_same(const struct dsa_port *a, @@ -636,6 +640,12 @@ dsa_port_offloads_bridge_dev(struct dsa_port *dp, return dsa_port_bridge_dev_get(dp) == bridge_dev; } +static inline bool dsa_port_offloads_bridge(struct dsa_port *dp, + const struct dsa_bridge *bridge) +{ + return dsa_port_bridge_dev_get(dp) == bridge->dev; +} + /* Returns true if any port of this tree offloads the given net_device */ static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst, const struct net_device *dev) @@ -812,17 +822,15 @@ struct dsa_switch_ops { */ int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); int (*port_bridge_join)(struct dsa_switch *ds, int port, - struct net_device *bridge); + struct dsa_bridge bridge); void (*port_bridge_leave)(struct dsa_switch *ds, int port, - struct net_device *bridge); + struct dsa_bridge bridge); /* Called right after .port_bridge_join() */ int (*port_bridge_tx_fwd_offload)(struct dsa_switch *ds, int port, - struct net_device *bridge, - unsigned int bridge_num); + struct dsa_bridge bridge); /* Called right before .port_bridge_leave() */ void (*port_bridge_tx_fwd_unoffload)(struct dsa_switch *ds, int port, - struct net_device *bridge, - unsigned int bridge_num); + struct dsa_bridge bridge); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); void (*port_fast_age)(struct dsa_switch *ds, int port); @@ -894,10 +902,10 @@ struct dsa_switch_ops { */ int (*crosschip_bridge_join)(struct dsa_switch *ds, int tree_index, int sw_index, int port, - struct net_device *br); + struct dsa_bridge bridge); void (*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index, int sw_index, int port, - struct net_device *br); + struct dsa_bridge bridge); int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index, int port); int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index, -- cgit v1.2.3 From b079922ba2acf072b23d82fa246a0d8de198f0a2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 6 Dec 2021 18:57:57 +0200 Subject: net: dsa: add a "tx_fwd_offload" argument to ->port_bridge_join MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a preparation patch for the removal of the DSA switch methods ->port_bridge_tx_fwd_offload() and ->port_bridge_tx_fwd_unoffload(). The plan is for the switch to report whether it offloads TX forwarding directly as a response to the ->port_bridge_join() method. This change deals with the noisy portion of converting all existing function prototypes to take this new boolean pointer argument. The bool is placed in the cross-chip notifier structure for bridge join, and a reference to it is provided to drivers. In the next change, DSA will then actually look at this value instead of calling ->port_bridge_tx_fwd_offload(). Signed-off-by: Vladimir Oltean Reviewed-by: Alvin Šipraga Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index b9789c0cd5e3..584b3f9462a0 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -822,7 +822,8 @@ struct dsa_switch_ops { */ int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); int (*port_bridge_join)(struct dsa_switch *ds, int port, - struct dsa_bridge bridge); + struct dsa_bridge bridge, + bool *tx_fwd_offload); void (*port_bridge_leave)(struct dsa_switch *ds, int port, struct dsa_bridge bridge); /* Called right after .port_bridge_join() */ -- cgit v1.2.3 From 857fdd74fb38dad4d0333401fb1826cd8cf0df2c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 6 Dec 2021 18:57:58 +0200 Subject: net: dsa: eliminate dsa_switch_ops :: port_bridge_tx_fwd_{,un}offload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't really need new switch API for these, and with new switches which intend to add support for this feature, it will become cumbersome to maintain. The change consists in restructuring the two drivers that implement this offload (sja1105 and mv88e6xxx) such that the offload is enabled and disabled from the ->port_bridge_{join,leave} methods instead of the old ->port_bridge_tx_fwd_{,un}offload. The only non-trivial change is that mv88e6xxx_map_virtual_bridge_to_pvt() has been moved to avoid a forward declaration, and the mv88e6xxx_reg_lock() calls from inside it have been removed, since locking is now done from mv88e6xxx_port_bridge_{join,leave}. Signed-off-by: Vladimir Oltean Reviewed-by: Alvin Šipraga Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 584b3f9462a0..bdf308a5c55e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -222,6 +222,7 @@ struct dsa_mall_tc_entry { struct dsa_bridge { struct net_device *dev; unsigned int num; + bool tx_fwd_offload; refcount_t refcount; }; @@ -826,12 +827,6 @@ struct dsa_switch_ops { bool *tx_fwd_offload); void (*port_bridge_leave)(struct dsa_switch *ds, int port, struct dsa_bridge bridge); - /* Called right after .port_bridge_join() */ - int (*port_bridge_tx_fwd_offload)(struct dsa_switch *ds, int port, - struct dsa_bridge bridge); - /* Called right before .port_bridge_leave() */ - void (*port_bridge_tx_fwd_unoffload)(struct dsa_switch *ds, int port, - struct dsa_bridge bridge); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); void (*port_fast_age)(struct dsa_switch *ds, int port); -- cgit v1.2.3 From 283e6f5a8166f075eba78da6b867d76cc5d47e77 Mon Sep 17 00:00:00 2001 From: Sergey Ryazanov Date: Tue, 7 Dec 2021 12:21:40 +0300 Subject: net: wwan: make debugfs optional Debugfs interface is optional for the regular modem use. Some distros and users will want to disable this feature for security or kernel size reasons. So add a configuration option that allows to completely disable the debugfs interface of the WWAN devices. A primary considered use case for this option was embedded firmwares. For example, in OpenWrt, you can not completely disable debugfs, as a lot of wireless stuff can only be configured and monitored with the debugfs knobs. At the same time, reducing the size of a kernel and modules is an essential task in the world of embedded software. Disabling the WWAN and IOSM debugfs interfaces allows us to save 50K (x86-64 build) of space for module storage. Not much, but already considerable when you only have 16MB of storage. So it is hard to just disable whole debugfs. Users need some fine grained set of options to control which debugfs interface is important and should be available and which is not. The new configuration symbol is enabled by default and is hidden under the EXPERT option. So a regular user would not be bothered by another one configuration question. While an embedded distro maintainer will be able to a little more reduce the final image size. Signed-off-by: Sergey Ryazanov Reviewed-by: Leon Romanovsky Reviewed-by: Loic Poulain Acked-by: M Chetan Kumar Signed-off-by: Jakub Kicinski --- include/linux/wwan.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 1646aa3e6779..e143c88bf4b0 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -171,6 +171,13 @@ int wwan_register_ops(struct device *parent, const struct wwan_ops *ops, void wwan_unregister_ops(struct device *parent); +#ifdef CONFIG_WWAN_DEBUGFS struct dentry *wwan_get_debugfs_dir(struct device *parent); +#else +static inline struct dentry *wwan_get_debugfs_dir(struct device *parent) +{ + return ERR_PTR(-ENODEV); +} +#endif #endif /* __WWAN_H */ -- cgit v1.2.3 From 469407a3b5ed9390cfacb0363d1cc926a51f6a14 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Thu, 9 Dec 2021 09:29:18 +0800 Subject: erofs: clean up erofs_map_blocks tracepoints Since the new type of chunk-based files is introduced, there is no need to leave flatmode tracepoints. Rename to erofs_map_blocks instead. Link: https://lore.kernel.org/r/20211209012918.30337-1-hsiangkao@linux.alibaba.com Reviewed-by: Yue Hu Signed-off-by: Gao Xiang --- include/trace/events/erofs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index 16ae7b666810..57de057bd503 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -169,7 +169,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_enter, __entry->flags ? show_map_flags(__entry->flags) : "NULL") ); -DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_flatmode_enter, +DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_enter, TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, unsigned flags), @@ -221,7 +221,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_exit, show_mflags(__entry->mflags), __entry->ret) ); -DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_flatmode_exit, +DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_exit, TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, unsigned flags, int ret), -- cgit v1.2.3 From 1197528aaea79ed4909aba695d18fdecc5387a36 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:27:28 +0100 Subject: genirq/msi: Guard sysfs code No point in building unused code when CONFIG_SYSFS=n. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Reviewed-by: Jason Gunthorpe Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20211206210223.985907940@linutronix.de --- include/linux/msi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index e616f94c7c58..d43b9469c88b 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -239,9 +239,19 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void pci_msi_mask_irq(struct irq_data *data); void pci_msi_unmask_irq(struct irq_data *data); +#ifdef CONFIG_SYSFS const struct attribute_group **msi_populate_sysfs(struct device *dev); void msi_destroy_sysfs(struct device *dev, const struct attribute_group **msi_irq_groups); +#else +static inline const struct attribute_group **msi_populate_sysfs(struct device *dev) +{ + return NULL; +} +static inline void msi_destroy_sysfs(struct device *dev, const struct attribute_group **msi_irq_groups) +{ +} +#endif /* * The arch hooks to setup up msi irqs. Default functions are implemented -- cgit v1.2.3 From 1dd2c6a0817fd08f80dee75d7d3bd99a0c4b828d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:27:29 +0100 Subject: genirq/msi: Remove unused domain callbacks No users and there is no need to grow them. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211126223824.322987915@linutronix.de Link: https://lore.kernel.org/r/20211206210224.041777889@linutronix.de --- include/linux/msi.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index d43b9469c88b..4b962f73f84a 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -304,7 +304,6 @@ struct msi_domain_info; * @msi_free: Domain specific function to free a MSI interrupts * @msi_check: Callback for verification of the domain/info/dev data * @msi_prepare: Prepare the allocation of the interrupts in the domain - * @msi_finish: Optional callback to finalize the allocation * @set_desc: Set the msi descriptor for an interrupt * @handle_error: Optional error handler if the allocation fails * @domain_alloc_irqs: Optional function to override the default allocation @@ -312,12 +311,11 @@ struct msi_domain_info; * @domain_free_irqs: Optional function to override the default free * function. * - * @get_hwirq, @msi_init and @msi_free are callbacks used by - * msi_create_irq_domain() and related interfaces + * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying + * irqdomain. * - * @msi_check, @msi_prepare, @msi_finish, @set_desc and @handle_error - * are callbacks used by msi_domain_alloc_irqs() and related - * interfaces which are based on msi_desc. + * @msi_check, @msi_prepare, @handle_error and @set_desc are callbacks used by + * msi_domain_alloc/free_irqs(). * * @domain_alloc_irqs, @domain_free_irqs can be used to override the * default allocation/free functions (__msi_domain_alloc/free_irqs). This @@ -351,7 +349,6 @@ struct msi_domain_ops { int (*msi_prepare)(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg); - void (*msi_finish)(msi_alloc_info_t *arg, int retval); void (*set_desc)(msi_alloc_info_t *arg, struct msi_desc *desc); int (*handle_error)(struct irq_domain *domain, -- cgit v1.2.3 From 3ba1f050c91d5ce3672dbf3a55dc2451c0b342e2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:27:31 +0100 Subject: genirq/msi: Fixup includes Remove the kobject.h include from msi.h as it's not required and add a sysfs.h include to the core code instead. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Reviewed-by: Jason Gunthorpe Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20211206210224.103502021@linutronix.de --- include/linux/msi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 4b962f73f84a..5c627750f269 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -2,7 +2,7 @@ #ifndef LINUX_MSI_H #define LINUX_MSI_H -#include +#include #include #include -- cgit v1.2.3 From 9e8688c5f2990dadcf83728cd00a7e8497fc6aa9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:27:33 +0100 Subject: PCI/MSI: Make pci_msi_domain_write_msg() static There is no point to have this function public as it is set by the PCI core anyway when a PCI/MSI irqdomain is created. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas # PCI Link: https://lore.kernel.org/r/20211206210224.157070464@linutronix.de --- include/linux/msi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 5c627750f269..d7b143a79cb4 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -455,7 +455,6 @@ void *platform_msi_get_host_data(struct irq_domain *domain); #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN -void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg); struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -- cgit v1.2.3 From ade044a3d0f0389e4f916337c505550acc3fd011 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:27:34 +0100 Subject: PCI/MSI: Remove msi_desc_to_pci_sysdata() Last user is gone long ago. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20211206210224.210768199@linutronix.de --- include/linux/msi.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index d7b143a79cb4..ac6fec105edc 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -218,13 +218,8 @@ static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, for_each_msi_entry((desc), &(pdev)->dev) struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc); -void *msi_desc_to_pci_sysdata(struct msi_desc *desc); void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg); #else /* CONFIG_PCI_MSI */ -static inline void *msi_desc_to_pci_sysdata(struct msi_desc *desc) -{ - return NULL; -} static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) { } -- cgit v1.2.3 From e58f2259b91c02974c20db7b28d39d810a21249b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:27:39 +0100 Subject: genirq/msi, treewide: Use a named struct for PCI/MSI attributes The unnamed struct sucks and is in the way of further cleanups. Stick the PCI related MSI data into a real data structure and cleanup all users. No functional change. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Reviewed-by: Jason Gunthorpe Reviewed-by: Greg Kroah-Hartman Acked-by: Kalle Valo Link: https://lore.kernel.org/r/20211206210224.374863119@linutronix.de --- include/linux/msi.h | 84 ++++++++++++++++++++++++++--------------------------- 1 file changed, 41 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index ac6fec105edc..7e5c13f4e41b 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -68,6 +68,42 @@ static inline void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc, struct msi_msg *msg); +/** + * pci_msi_desc - PCI/MSI specific MSI descriptor data + * + * @msi_mask: [PCI MSI] MSI cached mask bits + * @msix_ctrl: [PCI MSI-X] MSI-X cached per vector control bits + * @is_msix: [PCI MSI/X] True if MSI-X + * @multiple: [PCI MSI/X] log2 num of messages allocated + * @multi_cap: [PCI MSI/X] log2 num of messages supported + * @can_mask: [PCI MSI/X] Masking supported? + * @is_64: [PCI MSI/X] Address size: 0=32bit 1=64bit + * @entry_nr: [PCI MSI/X] Entry which is described by this descriptor + * @default_irq:[PCI MSI/X] The default pre-assigned non-MSI irq + * @mask_pos: [PCI MSI] Mask register position + * @mask_base: [PCI MSI-X] Mask register base address + */ +struct pci_msi_desc { + union { + u32 msi_mask; + u32 msix_ctrl; + }; + struct { + u8 is_msix : 1; + u8 multiple : 3; + u8 multi_cap : 3; + u8 can_mask : 1; + u8 is_64 : 1; + u8 is_virtual : 1; + u16 entry_nr; + unsigned default_irq; + } msi_attrib; + union { + u8 mask_pos; + void __iomem *mask_base; + }; +}; + /** * platform_msi_desc - Platform device specific msi descriptor data * @msi_priv_data: Pointer to platform private data @@ -107,17 +143,7 @@ struct ti_sci_inta_msi_desc { * address or data changes * @write_msi_msg_data: Data parameter for the callback. * - * @msi_mask: [PCI MSI] MSI cached mask bits - * @msix_ctrl: [PCI MSI-X] MSI-X cached per vector control bits - * @is_msix: [PCI MSI/X] True if MSI-X - * @multiple: [PCI MSI/X] log2 num of messages allocated - * @multi_cap: [PCI MSI/X] log2 num of messages supported - * @maskbit: [PCI MSI/X] Mask-Pending bit supported? - * @is_64: [PCI MSI/X] Address size: 0=32bit 1=64bit - * @entry_nr: [PCI MSI/X] Entry which is described by this descriptor - * @default_irq:[PCI MSI/X] The default pre-assigned non-MSI irq - * @mask_pos: [PCI MSI] Mask register position - * @mask_base: [PCI MSI-X] Mask register base address + * @pci: [PCI] PCI speficic msi descriptor data * @platform: [platform] Platform device specific msi descriptor data * @fsl_mc: [fsl-mc] FSL MC device specific msi descriptor data * @inta: [INTA] TISCI based INTA specific msi descriptor data @@ -138,38 +164,10 @@ struct msi_desc { void *write_msi_msg_data; union { - /* PCI MSI/X specific data */ - struct { - union { - u32 msi_mask; - u32 msix_ctrl; - }; - struct { - u8 is_msix : 1; - u8 multiple : 3; - u8 multi_cap : 3; - u8 can_mask : 1; - u8 is_64 : 1; - u8 is_virtual : 1; - u16 entry_nr; - unsigned default_irq; - } msi_attrib; - union { - u8 mask_pos; - void __iomem *mask_base; - }; - }; - - /* - * Non PCI variants add their data structure here. New - * entries need to use a named structure. We want - * proper name spaces for this. The PCI part is - * anonymous for now as it would require an immediate - * tree wide cleanup. - */ - struct platform_msi_desc platform; - struct fsl_mc_msi_desc fsl_mc; - struct ti_sci_inta_msi_desc inta; + struct pci_msi_desc pci; + struct platform_msi_desc platform; + struct fsl_mc_msi_desc fsl_mc; + struct ti_sci_inta_msi_desc inta; }; }; -- cgit v1.2.3 From ae72f3156729541581f526b85883ca53a20df2fa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:27:42 +0100 Subject: PCI/MSI: Make arch_restore_msi_irqs() less horrible. Make arch_restore_msi_irqs() return a boolean which indicates whether the core code should restore the MSI message or not. Get rid of the indirection in x86. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas # PCI Link: https://lore.kernel.org/r/20211206210224.485668098@linutronix.de --- include/linux/msi.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 7e5c13f4e41b..673899fc24f6 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -273,11 +273,10 @@ static inline void arch_teardown_msi_irqs(struct pci_dev *dev) #endif /* - * The restore hooks are still available as they are useful even - * for fully irq domain based setups. Courtesy to XEN/X86. + * The restore hook is still available even for fully irq domain based + * setups. Courtesy to XEN/X86. */ -void arch_restore_msi_irqs(struct pci_dev *dev); -void default_restore_msi_irqs(struct pci_dev *dev); +bool arch_restore_msi_irqs(struct pci_dev *dev); #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN -- cgit v1.2.3 From aa423ac4221abdfb8588751e7838ca5f42f56db3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:27:52 +0100 Subject: PCI/MSI: Split out irqdomain code Move the irqdomain specific code into its own file. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Reviewed-by: Jason Gunthorpe Reviewed-by: Greg Kroah-Hartman Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20211206210224.817754783@linutronix.de --- include/linux/msi.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 673899fc24f6..7ff7cf23b78d 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -259,17 +259,6 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc); void arch_teardown_msi_irq(unsigned int irq); int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void arch_teardown_msi_irqs(struct pci_dev *dev); -#else -static inline int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - WARN_ON_ONCE(1); - return -ENODEV; -} - -static inline void arch_teardown_msi_irqs(struct pci_dev *dev) -{ - WARN_ON_ONCE(1); -} #endif /* -- cgit v1.2.3 From 85aa607e79f8343f1ea028b29bdf8b6bc99c729a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:27:54 +0100 Subject: PCI/MSI: Sanitize MSI-X table map handling Unmapping the MSI-X base mapping in the loops which allocate/free MSI descriptors is daft and in the way of allowing runtime expansion of MSI-X descriptors. Store the mapping in struct pci_dev and free it after freeing the MSI-X descriptors. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20211206210224.871651518@linutronix.de --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 18a75c8e615c..8cb103677f5a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -473,6 +473,7 @@ struct pci_dev { u8 ptm_granularity; #endif #ifdef CONFIG_PCI_MSI + void __iomem *msix_base; const struct attribute_group **msi_irq_groups; #endif struct pci_vpd vpd; -- cgit v1.2.3 From cd119b09a87d8beb50356d8c5c6aa42d89c44eb7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:27:56 +0100 Subject: PCI/MSI: Move msi_lock to struct pci_dev It's only required for PCI/MSI. So no point in having it in every struct device. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20211206210224.925241961@linutronix.de --- include/linux/device.h | 2 -- include/linux/pci.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index e270cb740b9e..2a22875238a6 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -407,7 +407,6 @@ struct dev_links_info { * @em_pd: device's energy model performance domain * @pins: For device pin management. * See Documentation/driver-api/pin-control.rst for details. - * @msi_lock: Lock to protect MSI mask cache and mask register * @msi_list: Hosts MSI descriptors * @msi_domain: The generic MSI domain this device is using. * @numa_node: NUMA node this device is close to. @@ -508,7 +507,6 @@ struct device { struct dev_pin_info *pins; #endif #ifdef CONFIG_GENERIC_MSI_IRQ - raw_spinlock_t msi_lock; struct list_head msi_list; #endif #ifdef CONFIG_DMA_OPS diff --git a/include/linux/pci.h b/include/linux/pci.h index 8cb103677f5a..5cc46baef519 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -474,6 +474,7 @@ struct pci_dev { #endif #ifdef CONFIG_PCI_MSI void __iomem *msix_base; + raw_spinlock_t msi_lock; const struct attribute_group **msi_irq_groups; #endif struct pci_vpd vpd; -- cgit v1.2.3 From 57ce3a3c99b21e9c4f951ef01e0a3603c987c259 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:27:57 +0100 Subject: PCI/MSI: Make pci_msi_domain_check_cap() static No users outside of that file. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Reviewed-by: Jason Gunthorpe Reviewed-by: Greg Kroah-Hartman Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20211206210224.980989243@linutronix.de --- include/linux/msi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 7ff7cf23b78d..5248678e05d1 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -439,8 +439,6 @@ void *platform_msi_get_host_data(struct irq_domain *domain); struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -int pci_msi_domain_check_cap(struct irq_domain *domain, - struct msi_domain_info *info, struct device *dev); u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev); bool pci_dev_has_special_msi_domain(struct pci_dev *pdev); -- cgit v1.2.3 From 890337624e1fa2da079fc1c036a62d178c985280 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:27:59 +0100 Subject: genirq/msi: Handle PCI/MSI allocation fail in core code Get rid of yet another irqdomain callback and let the core code return the already available information of how many descriptors could be allocated. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Reviewed-by: Jason Gunthorpe Reviewed-by: Greg Kroah-Hartman Acked-by: Bjorn Helgaas # PCI Link: https://lore.kernel.org/r/20211206210225.046615302@linutronix.de --- include/linux/msi.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 5248678e05d1..ba4a39c430b5 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -286,7 +286,6 @@ struct msi_domain_info; * @msi_check: Callback for verification of the domain/info/dev data * @msi_prepare: Prepare the allocation of the interrupts in the domain * @set_desc: Set the msi descriptor for an interrupt - * @handle_error: Optional error handler if the allocation fails * @domain_alloc_irqs: Optional function to override the default allocation * function. * @domain_free_irqs: Optional function to override the default free @@ -295,7 +294,7 @@ struct msi_domain_info; * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying * irqdomain. * - * @msi_check, @msi_prepare, @handle_error and @set_desc are callbacks used by + * @msi_check, @msi_prepare and @set_desc are callbacks used by * msi_domain_alloc/free_irqs(). * * @domain_alloc_irqs, @domain_free_irqs can be used to override the @@ -332,8 +331,6 @@ struct msi_domain_ops { msi_alloc_info_t *arg); void (*set_desc)(msi_alloc_info_t *arg, struct msi_desc *desc); - int (*handle_error)(struct irq_domain *domain, - struct msi_desc *desc, int error); int (*domain_alloc_irqs)(struct irq_domain *domain, struct device *dev, int nvec); void (*domain_free_irqs)(struct irq_domain *domain, -- cgit v1.2.3 From ae710a458f0af6ba2b991ebdddffc66e8dbd765a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 10 Nov 2021 12:24:23 +0200 Subject: drm: Replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20211110102423.54282-1-andriy.shevchenko@linux.intel.com --- include/drm/drm_gem_ttm_helper.h | 2 +- include/drm/drm_gem_vram_helper.h | 2 +- include/drm/drm_mm.h | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gem_ttm_helper.h b/include/drm/drm_gem_ttm_helper.h index c1aa02bd4c89..78040f6cc6f3 100644 --- a/include/drm/drm_gem_ttm_helper.h +++ b/include/drm/drm_gem_ttm_helper.h @@ -3,7 +3,7 @@ #ifndef DRM_GEM_TTM_HELPER_H #define DRM_GEM_TTM_HELPER_H -#include +#include #include #include diff --git a/include/drm/drm_gem_vram_helper.h b/include/drm/drm_gem_vram_helper.h index d3cf06c9af65..b4ce27a72773 100644 --- a/include/drm/drm_gem_vram_helper.h +++ b/include/drm/drm_gem_vram_helper.h @@ -11,8 +11,8 @@ #include #include +#include #include -#include /* for container_of() */ struct drm_mode_create_dumb; struct drm_plane; diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index 9b4292f229c6..ac33ba1b18bc 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -39,13 +39,15 @@ */ #include #include -#include +#include #include #include #include #ifdef CONFIG_DRM_DEBUG_MM #include #endif +#include + #include #ifdef CONFIG_DRM_DEBUG_MM -- cgit v1.2.3 From 50468e4313355b161cac8a5155a45832995b7f25 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 16 Nov 2021 18:21:16 +0200 Subject: x86/sgx: Add an attribute for the amount of SGX memory in a NUMA node == Problem == The amount of SGX memory on a system is determined by the BIOS and it varies wildly between systems. It can be as small as dozens of MB's and as large as many GB's on servers. Just like how applications need to know how much regular RAM is available, enclave builders need to know how much SGX memory an enclave can consume. == Solution == Introduce a new sysfs file: /sys/devices/system/node/nodeX/x86/sgx_total_bytes to enumerate the amount of SGX memory available in each NUMA node. This serves the same function for SGX as /proc/meminfo or /sys/devices/system/node/nodeX/meminfo does for normal RAM. 'sgx_total_bytes' is needed today to help drive the SGX selftests. SGX-specific swap code is exercised by creating overcommitted enclaves which are larger than the physical SGX memory on the system. They currently use a CPUID-based approach which can diverge from the actual amount of SGX memory available. 'sgx_total_bytes' ensures that the selftests can work efficiently and do not attempt stupid things like creating a 100,000 MB enclave on a system with 128 MB of SGX memory. == Implementation Details == Introduce CONFIG_HAVE_ARCH_NODE_DEV_GROUP opt-in flag to expose an arch specific attribute group, and add an attribute for the amount of SGX memory in bytes to each NUMA node: == ABI Design Discussion == As opposed to the per-node ABI, a single, global ABI was considered. However, this would prevent enclaves from being able to size themselves so that they fit on a single NUMA node. Essentially, a single value would rule out NUMA optimizations for enclaves. Create a new "x86/" directory inside each "nodeX/" sysfs directory. 'sgx_total_bytes' is expected to be the first of at least a few sgx-specific files to be placed in the new directory. Just scanning /proc/meminfo, these are the no-brainers that we have for RAM, but we need for SGX: MemTotal: xxxx kB // sgx_total_bytes (implemented here) MemFree: yyyy kB // sgx_free_bytes SwapTotal: zzzz kB // sgx_swapped_bytes So, at *least* three. I think we will eventually end up needing something more along the lines of a dozen. A new directory (as opposed to being in the nodeX/ "root") directory avoids cluttering the root with several "sgx_*" files. Place the new file in a new "nodeX/x86/" directory because SGX is highly x86-specific. It is very unlikely that any other architecture (or even non-Intel x86 vendor) will ever implement SGX. Using "sgx/" as opposed to "x86/" was also considered. But, there is a real chance this can get used for other arch-specific purposes. [ dhansen: rewrite changelog ] Signed-off-by: Jarkko Sakkinen Signed-off-by: Dave Hansen Acked-by: Greg Kroah-Hartman Acked-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211116162116.93081-2-jarkko@kernel.org --- include/linux/numa.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/numa.h b/include/linux/numa.h index cb44cfe2b725..59df211d051f 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -58,4 +58,8 @@ static inline int phys_to_target_node(u64 start) } #endif +#ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP +extern const struct attribute_group arch_node_dev_group; +#endif + #endif /* _LINUX_NUMA_H */ -- cgit v1.2.3 From 67b967ddd93d0ed57d392a00f6f90060f0910c0e Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 18 Nov 2021 12:46:59 +0100 Subject: mtd: Introduce an expert mode for forensics and debugging purposes When developping NAND controller drivers or when debugging filesystem corruptions, it is quite common to need hacking locally into the MTD/NAND core in order to get access to the content of the bad blocks. Instead of having multiple implementations out there let's provide a simple yet effective specific MTD-wide debugfs entry to fully disable these checks on purpose. A warning is added to inform the user when this mode gets enabled. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211118114659.1282855-1-miquel.raynal@bootlin.com --- include/linux/mtd/mtd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index f5e7dfc2e4e9..1ffa933121f6 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -711,4 +711,7 @@ static inline int mtd_is_bitflip_or_eccerr(int err) { unsigned mtd_mmap_capabilities(struct mtd_info *mtd); +extern char *mtd_expert_analysis_warning; +extern bool mtd_expert_analysis_mode; + #endif /* __MTD_MTD_H__ */ -- cgit v1.2.3 From cab2d3fd6866e089b5c50db09dece131f85bfebd Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 9 Dec 2021 18:46:33 +0530 Subject: bus: mhi: core: Add support for forced PM resume For whatever reason, some devices like QCA6390, WCN6855 using ath11k are not in M3 state during PM resume, but still functional. The mhi_pm_resume should then not fail in those cases, and let the higher level device specific stack continue resuming process. Add an API mhi_pm_resume_force(), to force resuming irrespective of the current MHI state. This fixes a regression with non functional ath11k WiFi after suspend/resume cycle on some machines. Bug report: https://bugzilla.kernel.org/show_bug.cgi?id=214179 Link: https://lore.kernel.org/regressions/871r5p0x2u.fsf@codeaurora.org/ Fixes: 020d3b26c07a ("bus: mhi: Early MHI resume failure in non M3 state") Cc: stable@vger.kernel.org #5.13 Reported-by: Kalle Valo Reported-by: Pengyu Ma Tested-by: Kalle Valo Acked-by: Kalle Valo Signed-off-by: Loic Poulain [mani: Switched to API, added bug report, reported-by tags and CCed stable] Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20211209131633.4168-1-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 723985879035..a5cc4cdf9cc8 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -663,6 +663,19 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl); */ int mhi_pm_resume(struct mhi_controller *mhi_cntrl); +/** + * mhi_pm_resume_force - Force resume MHI from suspended state + * @mhi_cntrl: MHI controller + * + * Resume the device irrespective of its MHI state. As per the MHI spec, devices + * has to be in M3 state during resume. But some devices seem to be in a + * different MHI state other than M3 but they continue working fine if allowed. + * This API is intented to be used for such devices. + * + * Return: 0 if the resume succeeds, a negative error code otherwise + */ +int mhi_pm_resume_force(struct mhi_controller *mhi_cntrl); + /** * mhi_download_rddm_image - Download ramdump image from device for * debugging purpose. -- cgit v1.2.3 From dc70ec217cec504e6f8fee8fd91bf5c118af05f2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 21 Nov 2021 12:54:40 +0000 Subject: KVM: Introduce CONFIG_HAVE_KVM_DIRTY_RING I'd like to make the build include dirty_ring.c based on whether the arch wants it or not. That's a whole lot simpler if there's a config symbol instead of doing it implicitly on KVM_DIRTY_LOG_PAGE_OFFSET being set to something non-zero. Signed-off-by: David Woodhouse Message-Id: <20211121125451.9489-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/linux/kvm_dirty_ring.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h index 120e5e90fa1d..4da8d4a4140b 100644 --- a/include/linux/kvm_dirty_ring.h +++ b/include/linux/kvm_dirty_ring.h @@ -27,9 +27,9 @@ struct kvm_dirty_ring { int index; }; -#if (KVM_DIRTY_LOG_PAGE_OFFSET == 0) +#ifndef CONFIG_HAVE_KVM_DIRTY_RING /* - * If KVM_DIRTY_LOG_PAGE_OFFSET not defined, kvm_dirty_ring.o should + * If CONFIG_HAVE_HVM_DIRTY_RING not defined, kvm_dirty_ring.o should * not be included as well, so define these nop functions for the arch. */ static inline u32 kvm_dirty_ring_get_rsvd_entries(void) @@ -74,7 +74,7 @@ static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) return true; } -#else /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */ +#else /* CONFIG_HAVE_KVM_DIRTY_RING */ u32 kvm_dirty_ring_get_rsvd_entries(void); int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); @@ -98,6 +98,6 @@ struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset); void kvm_dirty_ring_free(struct kvm_dirty_ring *ring); bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring); -#endif /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */ +#endif /* CONFIG_HAVE_KVM_DIRTY_RING */ #endif /* KVM_DIRTY_RING_H */ -- cgit v1.2.3 From abaad3d95b5117a17886d37cf0228712801cd259 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 6 Dec 2021 12:26:17 -0500 Subject: drm/vmwgfx: Allow checking for gl43 contexts To make sure we're running on top of hardware that can support GL4.3 we need to add a way of querying for those capabilities. DRM_VMW_PARAM_GL43 allows userspace to check for presence of GL4.3 capable contexts. Signed-off-by: Zack Rusin Reviewed-by: Charmaine Lee Link: https://patchwork.freedesktop.org/patch/msgid/20211206172620.3139754-10-zack@kde.org --- include/uapi/drm/vmwgfx_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h index 9078775feb51..8277644c1144 100644 --- a/include/uapi/drm/vmwgfx_drm.h +++ b/include/uapi/drm/vmwgfx_drm.h @@ -110,6 +110,7 @@ extern "C" { #define DRM_VMW_PARAM_HW_CAPS2 13 #define DRM_VMW_PARAM_SM4_1 14 #define DRM_VMW_PARAM_SM5 15 +#define DRM_VMW_PARAM_GL43 16 /** * enum drm_vmw_handle_type - handle type for ref ioctls -- cgit v1.2.3 From 42288cb44c4b5fff7653bc392b583a2b8bd6a8c0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 8 Dec 2021 17:04:51 -0800 Subject: wait: add wake_up_pollfree() Several ->poll() implementations are special in that they use a waitqueue whose lifetime is the current task, rather than the struct file as is normally the case. This is okay for blocking polls, since a blocking poll occurs within one task; however, non-blocking polls require another solution. This solution is for the queue to be cleared before it is freed, using 'wake_up_poll(wq, EPOLLHUP | POLLFREE);'. However, that has a bug: wake_up_poll() calls __wake_up() with nr_exclusive=1. Therefore, if there are multiple "exclusive" waiters, and the wakeup function for the first one returns a positive value, only that one will be called. That's *not* what's needed for POLLFREE; POLLFREE is special in that it really needs to wake up everyone. Considering the three non-blocking poll systems: - io_uring poll doesn't handle POLLFREE at all, so it is broken anyway. - aio poll is unaffected, since it doesn't support exclusive waits. However, that's fragile, as someone could add this feature later. - epoll doesn't appear to be broken by this, since its wakeup function returns 0 when it sees POLLFREE. But this is fragile. Although there is a workaround (see epoll), it's better to define a function which always sends POLLFREE to all waiters. Add such a function. Also make it verify that the queue really becomes empty after all waiters have been woken up. Reported-by: Linus Torvalds Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211209010455.42744-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/wait.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index 2d0df57c9902..851e07da2583 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -217,6 +217,7 @@ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); +void __wake_up_pollfree(struct wait_queue_head *wq_head); #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) @@ -245,6 +246,31 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); #define wake_up_interruptible_sync_poll_locked(x, m) \ __wake_up_locked_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m)) +/** + * wake_up_pollfree - signal that a polled waitqueue is going away + * @wq_head: the wait queue head + * + * In the very rare cases where a ->poll() implementation uses a waitqueue whose + * lifetime is tied to a task rather than to the 'struct file' being polled, + * this function must be called before the waitqueue is freed so that + * non-blocking polls (e.g. epoll) are notified that the queue is going away. + * + * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via + * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU. + */ +static inline void wake_up_pollfree(struct wait_queue_head *wq_head) +{ + /* + * For performance reasons, we don't always take the queue lock here. + * Therefore, we might race with someone removing the last entry from + * the queue, and proceed while they still hold the queue lock. + * However, rcu_read_lock() is required to be held in such cases, so we + * can safely proceed with an RCU-delayed free. + */ + if (waitqueue_active(wq_head)) + __wake_up_pollfree(wq_head); +} + #define ___wait_cond_timeout(condition) \ ({ \ bool __cond = (condition); \ -- cgit v1.2.3 From 50252e4b5e989ce64555c7aef7516bdefc2fea72 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 8 Dec 2021 17:04:55 -0800 Subject: aio: fix use-after-free due to missing POLLFREE handling signalfd_poll() and binder_poll() are special in that they use a waitqueue whose lifetime is the current task, rather than the struct file as is normally the case. This is okay for blocking polls, since a blocking poll occurs within one task; however, non-blocking polls require another solution. This solution is for the queue to be cleared before it is freed, by sending a POLLFREE notification to all waiters. Unfortunately, only eventpoll handles POLLFREE. A second type of non-blocking poll, aio poll, was added in kernel v4.18, and it doesn't handle POLLFREE. This allows a use-after-free to occur if a signalfd or binder fd is polled with aio poll, and the waitqueue gets freed. Fix this by making aio poll handle POLLFREE. A patch by Ramji Jiyani (https://lore.kernel.org/r/20211027011834.2497484-1-ramjiyani@google.com) tried to do this by making aio_poll_wake() always complete the request inline if POLLFREE is seen. However, that solution had two bugs. First, it introduced a deadlock, as it unconditionally locked the aio context while holding the waitqueue lock, which inverts the normal locking order. Second, it didn't consider that POLLFREE notifications are missed while the request has been temporarily de-queued. The second problem was solved by my previous patch. This patch then properly fixes the use-after-free by handling POLLFREE in a deadlock-free way. It does this by taking advantage of the fact that freeing of the waitqueue is RCU-delayed, similar to what eventpoll does. Fixes: 2c14fa838cbe ("aio: implement IOCB_CMD_POLL") Cc: # v4.18+ Link: https://lore.kernel.org/r/20211209010455.42744-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/uapi/asm-generic/poll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/asm-generic/poll.h b/include/uapi/asm-generic/poll.h index 41b509f410bf..f9c520ce4bf4 100644 --- a/include/uapi/asm-generic/poll.h +++ b/include/uapi/asm-generic/poll.h @@ -29,7 +29,7 @@ #define POLLRDHUP 0x2000 #endif -#define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */ +#define POLLFREE (__force __poll_t)0x4000 #define POLL_BUSY_LOOP (__force __poll_t)0x8000 -- cgit v1.2.3 From 6abfaaf124a81b7d2ab132cc2c9885baa14171e5 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2021 16:18:45 +0200 Subject: fs_parse: allow parameter value to be empty Allow parameter value to be empty by specifying fs_param_can_be_empty flag. Signed-off-by: Lukas Czerner Cc: Al Viro Reviewed-by: Carlos Maiolino Link: https://lore.kernel.org/r/20211027141857.33657-2-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- include/linux/fs_parser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index aab0ffc6bac6..f103c91139d4 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -42,7 +42,7 @@ struct fs_parameter_spec { u8 opt; /* Option number (returned by fs_parse()) */ unsigned short flags; #define fs_param_neg_with_no 0x0002 /* "noxxx" is negative param */ -#define fs_param_neg_with_empty 0x0004 /* "xxx=" is negative param */ +#define fs_param_can_be_empty 0x0004 /* "xxx=" is allowed */ #define fs_param_deprecated 0x0008 /* The param is deprecated */ const void *data; }; -- cgit v1.2.3 From 3e5b1feccea7db576353ffc302f78d522e4116e6 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 9 Dec 2021 13:11:32 +0000 Subject: net: phylink: add legacy_pre_march2020 indicator Add a boolean to phylink_config to indicate whether a driver has not been updated for the changes in commit 7cceb599d15d ("net: phylink: avoid mac_config calls"), and thus are reliant on the old behaviour. We were currently keying the phylink behaviour on the presence of a PCS, but this is sub-optimal for modern drivers that may not have a PCS. This commit merely introduces the new flag, but does not add any use, since we need all legacy drivers to set this flag before it can be used. Once these legacy drivers have been updated, we can remove this flag. Signed-off-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- include/linux/phylink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 01224235df0f..d005b8e36048 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -84,6 +84,8 @@ enum phylink_op_type { * struct phylink_config - PHYLINK configuration structure * @dev: a pointer to a struct device associated with the MAC * @type: operation type of PHYLINK instance + * @legacy_pre_march2020: driver has not been updated for March 2020 updates + * (See commit 7cceb599d15d ("net: phylink: avoid mac_config calls") * @pcs_poll: MAC PCS cannot provide link change interrupt * @poll_fixed_state: if true, starts link_poll, * if MAC link is at %MLO_AN_FIXED mode. @@ -97,6 +99,7 @@ enum phylink_op_type { struct phylink_config { struct device *dev; enum phylink_op_type type; + bool legacy_pre_march2020; bool pcs_poll; bool poll_fixed_state; bool ovr_an_inband; -- cgit v1.2.3 From 001f4261fe4d5ae710cf1f445b6cae6d9d3ae26e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 9 Dec 2021 13:11:48 +0000 Subject: net: phylink: use legacy_pre_march2020 Use the legacy flag to indicate whether we should operate in legacy mode. This allows us to stop using the presence of a PCS as an indicator to the age of the phylink user, and make PCS presence optional. Legacy mode involves: 1) calling mac_config() whenever the link comes up 2) calling mac_config() whenever the inband advertisement changes, possibly followed by a call to mac_an_restart() 3) making use of mac_an_restart() 4) making use of mac_pcs_get_state() All the above functionality was moved to a seperate "PCS" block of operations in March 2020. Update the documents to indicate that the differences that this flag makes. Signed-off-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- include/linux/phylink.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index d005b8e36048..a2f266cc3442 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -190,6 +190,10 @@ void validate(struct phylink_config *config, unsigned long *supported, * negotiation completion state in @state->an_complete, and link up state * in @state->link. If possible, @state->lp_advertising should also be * populated. + * + * Note: This is a legacy method. This function will not be called unless + * legacy_pre_march2020 is set in &struct phylink_config and there is no + * PCS attached. */ void mac_pcs_get_state(struct phylink_config *config, struct phylink_link_state *state); @@ -230,6 +234,15 @@ int mac_prepare(struct phylink_config *config, unsigned int mode, * guaranteed to be correct, and so any mac_config() implementation must * never reference these fields. * + * Note: For legacy March 2020 drivers (drivers with legacy_pre_march2020 set + * in their &phylnk_config and which don't have a PCS), this function will be + * called on each link up event, and to also change the in-band advert. For + * non-legacy drivers, it will only be called to reconfigure the MAC for a + * "major" change in e.g. interface mode. It will not be called for changes + * in speed, duplex or pause modes or to change the in-band advertisement. + * In any case, it is strongly preferred that speed, duplex and pause settings + * are handled in the mac_link_up() method and not in this method. + * * (this requires a rewrite - please refer to mac_link_up() for situations * where the PCS and MAC are not tightly integrated.) * @@ -314,6 +327,10 @@ int mac_finish(struct phylink_config *config, unsigned int mode, /** * mac_an_restart() - restart 802.3z BaseX autonegotiation * @config: a pointer to a &struct phylink_config. + * + * Note: This is a legacy method. This function will not be called unless + * legacy_pre_march2020 is set in &struct phylink_config and there is no + * PCS attached. */ void mac_an_restart(struct phylink_config *config); -- cgit v1.2.3 From 1a2fb220edca98d18f90e3ef5bd6853a6b22b1b8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 6 Dec 2021 22:27:58 -0800 Subject: skbuff: Extract list pointers to silence compiler warnings Under both -Warray-bounds and the object_size sanitizer, the compiler is upset about accessing prev/next of sk_buff when the object it thinks it is coming from is sk_buff_head. The warning is a false positive due to the compiler taking a conservative approach, opting to warn at casting time rather than access time. However, in support of enabling -Warray-bounds globally (which has found many real bugs), arrange things for sk_buff so that the compiler can unambiguously see that there is no intention to access anything except prev/next. Introduce and cast to a separate struct sk_buff_list, which contains _only_ the first two fields, silencing the warnings: In file included from ./include/net/net_namespace.h:39, from ./include/linux/netdevice.h:37, from net/core/netpoll.c:17: net/core/netpoll.c: In function 'refill_skbs': ./include/linux/skbuff.h:2086:9: warning: array subscript 'struct sk_buff[0]' is partly outside array bounds of 'struct sk_buff_head[1]' [-Warray-bounds] 2086 | __skb_insert(newsk, next->prev, next, list); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/core/netpoll.c:49:28: note: while referencing 'skb_pool' 49 | static struct sk_buff_head skb_pool; | ^~~~~~~~ This change results in no executable instruction differences. Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20211207062758.2324338-1-keescook@chromium.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index dd262bd8ddbe..6535294f6a48 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -292,9 +292,11 @@ struct tc_skb_ext { #endif struct sk_buff_head { - /* These two members must be first. */ - struct sk_buff *next; - struct sk_buff *prev; + /* These two members must be first to match sk_buff. */ + struct_group_tagged(sk_buff_list, list, + struct sk_buff *next; + struct sk_buff *prev; + ); __u32 qlen; spinlock_t lock; @@ -730,7 +732,7 @@ typedef unsigned char *sk_buff_data_t; struct sk_buff { union { struct { - /* These two members must be first. */ + /* These two members must be first to match sk_buff_head. */ struct sk_buff *next; struct sk_buff *prev; @@ -1976,8 +1978,8 @@ static inline void __skb_insert(struct sk_buff *newsk, */ WRITE_ONCE(newsk->next, next); WRITE_ONCE(newsk->prev, prev); - WRITE_ONCE(next->prev, newsk); - WRITE_ONCE(prev->next, newsk); + WRITE_ONCE(((struct sk_buff_list *)next)->prev, newsk); + WRITE_ONCE(((struct sk_buff_list *)prev)->next, newsk); WRITE_ONCE(list->qlen, list->qlen + 1); } @@ -2073,7 +2075,7 @@ static inline void __skb_queue_after(struct sk_buff_head *list, struct sk_buff *prev, struct sk_buff *newsk) { - __skb_insert(newsk, prev, prev->next, list); + __skb_insert(newsk, prev, ((struct sk_buff_list *)prev)->next, list); } void skb_append(struct sk_buff *old, struct sk_buff *newsk, @@ -2083,7 +2085,7 @@ static inline void __skb_queue_before(struct sk_buff_head *list, struct sk_buff *next, struct sk_buff *newsk) { - __skb_insert(newsk, next->prev, next, list); + __skb_insert(newsk, ((struct sk_buff_list *)next)->prev, next, list); } /** -- cgit v1.2.3 From a4f1192cb53758a7210ed5a9ee695aeba22f75fb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 9 Dec 2021 14:30:33 +0200 Subject: percpu_ref: Replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Signed-off-by: Andy Shevchenko Signed-off-by: Dennis Zhou --- include/linux/percpu-refcount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index b31d3f3312ce..d73a1c08c3e3 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -51,9 +51,9 @@ #define _LINUX_PERCPU_REFCOUNT_H #include -#include #include #include +#include #include struct percpu_ref; -- cgit v1.2.3 From 9756f64c8f2d19c0029a5827bda8ac275302ec22 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 30 Nov 2021 12:44:11 +0100 Subject: kcsan: Avoid checking scoped accesses from nested contexts Avoid checking scoped accesses from nested contexts (such as nested interrupts or in scheduler code) which share the same kcsan_ctx. This is to avoid detecting false positive races of accesses in the same thread with currently scoped accesses: consider setting up a watchpoint for a non-scoped (normal) access that also "conflicts" with a current scoped access. In a nested interrupt (or in the scheduler), which shares the same kcsan_ctx, we cannot check scoped accesses set up in the parent context -- simply ignore them in this case. With the introduction of kcsan_ctx::disable_scoped, we can also clean up kcsan_check_scoped_accesses()'s recursion guard, and do not need to modify the list's prev pointer. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h index fc266ecb2a4d..13cef3458fed 100644 --- a/include/linux/kcsan.h +++ b/include/linux/kcsan.h @@ -21,6 +21,7 @@ */ struct kcsan_ctx { int disable_count; /* disable counter */ + int disable_scoped; /* disable scoped access counter */ int atomic_next; /* number of following atomic ops */ /* -- cgit v1.2.3 From 69562e4983d93e2791c0bf128b07462afbd7f4dc Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 5 Aug 2021 14:57:45 +0200 Subject: kcsan: Add core support for a subset of weak memory modeling Add support for modeling a subset of weak memory, which will enable detection of a subset of data races due to missing memory barriers. KCSAN's approach to detecting missing memory barriers is based on modeling access reordering, and enabled if `CONFIG_KCSAN_WEAK_MEMORY=y`, which depends on `CONFIG_KCSAN_STRICT=y`. The feature can be enabled or disabled at boot and runtime via the `kcsan.weak_memory` boot parameter. Each memory access for which a watchpoint is set up, is also selected for simulated reordering within the scope of its function (at most 1 in-flight access). We are limited to modeling the effects of "buffering" (delaying the access), since the runtime cannot "prefetch" accesses (therefore no acquire modeling). Once an access has been selected for reordering, it is checked along every other access until the end of the function scope. If an appropriate memory barrier is encountered, the access will no longer be considered for reordering. When the result of a memory operation should be ordered by a barrier, KCSAN can then detect data races where the conflict only occurs as a result of a missing barrier due to reordering accesses. Suggested-by: Dmitry Vyukov Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan-checks.h | 10 +++++++++- include/linux/kcsan.h | 10 +++++++++- include/linux/sched.h | 3 +++ 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index 5f5965246877..a1c6a89fde71 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -99,7 +99,15 @@ void kcsan_set_access_mask(unsigned long mask); /* Scoped access information. */ struct kcsan_scoped_access { - struct list_head list; + union { + struct list_head list; /* scoped_accesses list */ + /* + * Not an entry in scoped_accesses list; stack depth from where + * the access was initialized. + */ + int stack_depth; + }; + /* Access information. */ const volatile void *ptr; size_t size; diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h index 13cef3458fed..c07c71f5ba4f 100644 --- a/include/linux/kcsan.h +++ b/include/linux/kcsan.h @@ -49,8 +49,16 @@ struct kcsan_ctx { */ unsigned long access_mask; - /* List of scoped accesses. */ + /* List of scoped accesses; likely to be empty. */ struct list_head scoped_accesses; + +#ifdef CONFIG_KCSAN_WEAK_MEMORY + /* + * Scoped access for modeling access reordering to detect missing memory + * barriers; only keep 1 to keep fast-path complexity manageable. + */ + struct kcsan_scoped_access reorder_access; +#endif }; /** diff --git a/include/linux/sched.h b/include/linux/sched.h index 78c351e35fec..0cd40b010487 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1339,6 +1339,9 @@ struct task_struct { #ifdef CONFIG_TRACE_IRQFLAGS struct irqtrace_events kcsan_save_irqtrace; #endif +#ifdef CONFIG_KCSAN_WEAK_MEMORY + int kcsan_stack_depth; +#endif #endif #if IS_ENABLED(CONFIG_KUNIT) -- cgit v1.2.3 From 0b8b0830ac1419d7250fde31ea78793a03f3db44 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 30 Nov 2021 12:44:13 +0100 Subject: kcsan: Add core memory barrier instrumentation functions Add the core memory barrier instrumentation functions. These invalidate the current in-flight reordered access based on the rules for the respective barrier types and in-flight access type. To obtain barrier instrumentation that can be disabled via __no_kcsan with appropriate compiler-support (and not just with objtool help), barrier instrumentation repurposes __atomic_signal_fence(), instead of inserting explicit calls. Crucially, __atomic_signal_fence() normally does not map to any real instructions, but is still intercepted by fsanitize=thread. As a result, like any other instrumentation done by the compiler, barrier instrumentation can be disabled with __no_kcsan. Unfortunately Clang and GCC currently differ in their __no_kcsan aka __no_sanitize_thread behaviour with respect to builtin atomics (and __tsan_func_{entry,exit}) instrumentation. This is already reflected in Kconfig.kcsan's dependencies for KCSAN_WEAK_MEMORY. A later change will introduce support for newer versions of Clang that can implement __no_kcsan to also remove the additional instrumentation introduced by KCSAN_WEAK_MEMORY. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan-checks.h | 71 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index a1c6a89fde71..9d2c869167f2 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -36,6 +36,36 @@ */ void __kcsan_check_access(const volatile void *ptr, size_t size, int type); +/* + * See definition of __tsan_atomic_signal_fence() in kernel/kcsan/core.c. + * Note: The mappings are arbitrary, and do not reflect any real mappings of C11 + * memory orders to the LKMM memory orders and vice-versa! + */ +#define __KCSAN_BARRIER_TO_SIGNAL_FENCE_mb __ATOMIC_SEQ_CST +#define __KCSAN_BARRIER_TO_SIGNAL_FENCE_wmb __ATOMIC_ACQ_REL +#define __KCSAN_BARRIER_TO_SIGNAL_FENCE_rmb __ATOMIC_ACQUIRE +#define __KCSAN_BARRIER_TO_SIGNAL_FENCE_release __ATOMIC_RELEASE + +/** + * __kcsan_mb - full memory barrier instrumentation + */ +void __kcsan_mb(void); + +/** + * __kcsan_wmb - write memory barrier instrumentation + */ +void __kcsan_wmb(void); + +/** + * __kcsan_rmb - read memory barrier instrumentation + */ +void __kcsan_rmb(void); + +/** + * __kcsan_release - release barrier instrumentation + */ +void __kcsan_release(void); + /** * kcsan_disable_current - disable KCSAN for the current context * @@ -159,6 +189,10 @@ void kcsan_end_scoped_access(struct kcsan_scoped_access *sa); static inline void __kcsan_check_access(const volatile void *ptr, size_t size, int type) { } +static inline void __kcsan_mb(void) { } +static inline void __kcsan_wmb(void) { } +static inline void __kcsan_rmb(void) { } +static inline void __kcsan_release(void) { } static inline void kcsan_disable_current(void) { } static inline void kcsan_enable_current(void) { } static inline void kcsan_enable_current_nowarn(void) { } @@ -191,12 +225,45 @@ static inline void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) { } */ #define __kcsan_disable_current kcsan_disable_current #define __kcsan_enable_current kcsan_enable_current_nowarn -#else +#else /* __SANITIZE_THREAD__ */ static inline void kcsan_check_access(const volatile void *ptr, size_t size, int type) { } static inline void __kcsan_enable_current(void) { } static inline void __kcsan_disable_current(void) { } -#endif +#endif /* __SANITIZE_THREAD__ */ + +#if defined(CONFIG_KCSAN_WEAK_MEMORY) && defined(__SANITIZE_THREAD__) +/* + * Normal barrier instrumentation is not done via explicit calls, but by mapping + * to a repurposed __atomic_signal_fence(), which normally does not generate any + * real instructions, but is still intercepted by fsanitize=thread. This means, + * like any other compile-time instrumentation, barrier instrumentation can be + * disabled with the __no_kcsan function attribute. + * + * Also see definition of __tsan_atomic_signal_fence() in kernel/kcsan/core.c. + */ +#define __KCSAN_BARRIER_TO_SIGNAL_FENCE(name) \ + static __always_inline void kcsan_##name(void) \ + { \ + barrier(); \ + __atomic_signal_fence(__KCSAN_BARRIER_TO_SIGNAL_FENCE_##name); \ + barrier(); \ + } +__KCSAN_BARRIER_TO_SIGNAL_FENCE(mb) +__KCSAN_BARRIER_TO_SIGNAL_FENCE(wmb) +__KCSAN_BARRIER_TO_SIGNAL_FENCE(rmb) +__KCSAN_BARRIER_TO_SIGNAL_FENCE(release) +#elif defined(CONFIG_KCSAN_WEAK_MEMORY) && defined(__KCSAN_INSTRUMENT_BARRIERS__) +#define kcsan_mb __kcsan_mb +#define kcsan_wmb __kcsan_wmb +#define kcsan_rmb __kcsan_rmb +#define kcsan_release __kcsan_release +#else /* CONFIG_KCSAN_WEAK_MEMORY && ... */ +static inline void kcsan_mb(void) { } +static inline void kcsan_wmb(void) { } +static inline void kcsan_rmb(void) { } +static inline void kcsan_release(void) { } +#endif /* CONFIG_KCSAN_WEAK_MEMORY && ... */ /** * __kcsan_check_read - check regular read access for races -- cgit v1.2.3 From f948666de517cf8ebef7cb2c9b2d669dec4bfe2e Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 30 Nov 2021 12:44:22 +0100 Subject: locking/barriers, kcsan: Add instrumentation for barriers Adds the required KCSAN instrumentation for barriers if CONFIG_SMP. KCSAN supports modeling the effects of: smp_mb() smp_rmb() smp_wmb() smp_store_release() Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/asm-generic/barrier.h | 29 +++++++++++++++-------------- include/linux/spinlock.h | 2 +- 2 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 640f09479bdf..27a9c9edfef6 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -14,6 +14,7 @@ #ifndef __ASSEMBLY__ #include +#include #include #ifndef nop @@ -62,15 +63,15 @@ #ifdef CONFIG_SMP #ifndef smp_mb -#define smp_mb() __smp_mb() +#define smp_mb() do { kcsan_mb(); __smp_mb(); } while (0) #endif #ifndef smp_rmb -#define smp_rmb() __smp_rmb() +#define smp_rmb() do { kcsan_rmb(); __smp_rmb(); } while (0) #endif #ifndef smp_wmb -#define smp_wmb() __smp_wmb() +#define smp_wmb() do { kcsan_wmb(); __smp_wmb(); } while (0) #endif #else /* !CONFIG_SMP */ @@ -123,19 +124,19 @@ do { \ #ifdef CONFIG_SMP #ifndef smp_store_mb -#define smp_store_mb(var, value) __smp_store_mb(var, value) +#define smp_store_mb(var, value) do { kcsan_mb(); __smp_store_mb(var, value); } while (0) #endif #ifndef smp_mb__before_atomic -#define smp_mb__before_atomic() __smp_mb__before_atomic() +#define smp_mb__before_atomic() do { kcsan_mb(); __smp_mb__before_atomic(); } while (0) #endif #ifndef smp_mb__after_atomic -#define smp_mb__after_atomic() __smp_mb__after_atomic() +#define smp_mb__after_atomic() do { kcsan_mb(); __smp_mb__after_atomic(); } while (0) #endif #ifndef smp_store_release -#define smp_store_release(p, v) __smp_store_release(p, v) +#define smp_store_release(p, v) do { kcsan_release(); __smp_store_release(p, v); } while (0) #endif #ifndef smp_load_acquire @@ -178,13 +179,13 @@ do { \ #endif /* CONFIG_SMP */ /* Barriers for virtual machine guests when talking to an SMP host */ -#define virt_mb() __smp_mb() -#define virt_rmb() __smp_rmb() -#define virt_wmb() __smp_wmb() -#define virt_store_mb(var, value) __smp_store_mb(var, value) -#define virt_mb__before_atomic() __smp_mb__before_atomic() -#define virt_mb__after_atomic() __smp_mb__after_atomic() -#define virt_store_release(p, v) __smp_store_release(p, v) +#define virt_mb() do { kcsan_mb(); __smp_mb(); } while (0) +#define virt_rmb() do { kcsan_rmb(); __smp_rmb(); } while (0) +#define virt_wmb() do { kcsan_wmb(); __smp_wmb(); } while (0) +#define virt_store_mb(var, value) do { kcsan_mb(); __smp_store_mb(var, value); } while (0) +#define virt_mb__before_atomic() do { kcsan_mb(); __smp_mb__before_atomic(); } while (0) +#define virt_mb__after_atomic() do { kcsan_mb(); __smp_mb__after_atomic(); } while (0) +#define virt_store_release(p, v) do { kcsan_release(); __smp_store_release(p, v); } while (0) #define virt_load_acquire(p) __smp_load_acquire(p) /** diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index b4e5ca23f840..5c0c5174155d 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -171,7 +171,7 @@ do { \ * Architectures that can implement ACQUIRE better need to take care. */ #ifndef smp_mb__after_spinlock -#define smp_mb__after_spinlock() do { } while (0) +#define smp_mb__after_spinlock() kcsan_mb() #endif #ifdef CONFIG_DEBUG_SPINLOCK -- cgit v1.2.3 From 2505a51ac6f249956735e0a369e2404f96eebef0 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 30 Nov 2021 12:44:23 +0100 Subject: locking/barriers, kcsan: Support generic instrumentation Thus far only smp_*() barriers had been defined by asm-generic/barrier.h based on __smp_*() barriers, because the !SMP case is usually generic. With the introduction of instrumentation, it also makes sense to have asm-generic/barrier.h assist in the definition of instrumented versions of mb(), rmb(), wmb(), dma_rmb(), and dma_wmb(). Because there is no requirement to distinguish the !SMP case, the definition can be simpler: we can avoid also providing fallbacks for the __ prefixed cases, and only check if `defined(__)`, to finally define the KCSAN-instrumented versions. This also allows for the compiler to complain if an architecture accidentally defines both the normal and __ prefixed variant. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/asm-generic/barrier.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 27a9c9edfef6..02c4339c8eeb 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -21,6 +21,31 @@ #define nop() asm volatile ("nop") #endif +/* + * Architectures that want generic instrumentation can define __ prefixed + * variants of all barriers. + */ + +#ifdef __mb +#define mb() do { kcsan_mb(); __mb(); } while (0) +#endif + +#ifdef __rmb +#define rmb() do { kcsan_rmb(); __rmb(); } while (0) +#endif + +#ifdef __wmb +#define wmb() do { kcsan_wmb(); __wmb(); } while (0) +#endif + +#ifdef __dma_rmb +#define dma_rmb() do { kcsan_rmb(); __dma_rmb(); } while (0) +#endif + +#ifdef __dma_wmb +#define dma_wmb() do { kcsan_wmb(); __dma_wmb(); } while (0) +#endif + /* * Force strict CPU ordering. And yes, this is required on UP too when we're * talking to devices. -- cgit v1.2.3 From e87c4f6642f49627c3430cb3ee78c73fb51b48e4 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 30 Nov 2021 12:44:24 +0100 Subject: locking/atomics, kcsan: Add instrumentation for barriers Adds the required KCSAN instrumentation for barriers of atomics. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/atomic/atomic-instrumented.h | 135 ++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index a0f654370da3..5d69b143c28e 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -45,6 +45,7 @@ atomic_set(atomic_t *v, int i) static __always_inline void atomic_set_release(atomic_t *v, int i) { + kcsan_release(); instrument_atomic_write(v, sizeof(*v)); arch_atomic_set_release(v, i); } @@ -59,6 +60,7 @@ atomic_add(int i, atomic_t *v) static __always_inline int atomic_add_return(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_add_return(i, v); } @@ -73,6 +75,7 @@ atomic_add_return_acquire(int i, atomic_t *v) static __always_inline int atomic_add_return_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_add_return_release(i, v); } @@ -87,6 +90,7 @@ atomic_add_return_relaxed(int i, atomic_t *v) static __always_inline int atomic_fetch_add(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_add(i, v); } @@ -101,6 +105,7 @@ atomic_fetch_add_acquire(int i, atomic_t *v) static __always_inline int atomic_fetch_add_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_add_release(i, v); } @@ -122,6 +127,7 @@ atomic_sub(int i, atomic_t *v) static __always_inline int atomic_sub_return(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_sub_return(i, v); } @@ -136,6 +142,7 @@ atomic_sub_return_acquire(int i, atomic_t *v) static __always_inline int atomic_sub_return_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_sub_return_release(i, v); } @@ -150,6 +157,7 @@ atomic_sub_return_relaxed(int i, atomic_t *v) static __always_inline int atomic_fetch_sub(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_sub(i, v); } @@ -164,6 +172,7 @@ atomic_fetch_sub_acquire(int i, atomic_t *v) static __always_inline int atomic_fetch_sub_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_sub_release(i, v); } @@ -185,6 +194,7 @@ atomic_inc(atomic_t *v) static __always_inline int atomic_inc_return(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_return(v); } @@ -199,6 +209,7 @@ atomic_inc_return_acquire(atomic_t *v) static __always_inline int atomic_inc_return_release(atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_return_release(v); } @@ -213,6 +224,7 @@ atomic_inc_return_relaxed(atomic_t *v) static __always_inline int atomic_fetch_inc(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_inc(v); } @@ -227,6 +239,7 @@ atomic_fetch_inc_acquire(atomic_t *v) static __always_inline int atomic_fetch_inc_release(atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_inc_release(v); } @@ -248,6 +261,7 @@ atomic_dec(atomic_t *v) static __always_inline int atomic_dec_return(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_return(v); } @@ -262,6 +276,7 @@ atomic_dec_return_acquire(atomic_t *v) static __always_inline int atomic_dec_return_release(atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_return_release(v); } @@ -276,6 +291,7 @@ atomic_dec_return_relaxed(atomic_t *v) static __always_inline int atomic_fetch_dec(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_dec(v); } @@ -290,6 +306,7 @@ atomic_fetch_dec_acquire(atomic_t *v) static __always_inline int atomic_fetch_dec_release(atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_dec_release(v); } @@ -311,6 +328,7 @@ atomic_and(int i, atomic_t *v) static __always_inline int atomic_fetch_and(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_and(i, v); } @@ -325,6 +343,7 @@ atomic_fetch_and_acquire(int i, atomic_t *v) static __always_inline int atomic_fetch_and_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_and_release(i, v); } @@ -346,6 +365,7 @@ atomic_andnot(int i, atomic_t *v) static __always_inline int atomic_fetch_andnot(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_andnot(i, v); } @@ -360,6 +380,7 @@ atomic_fetch_andnot_acquire(int i, atomic_t *v) static __always_inline int atomic_fetch_andnot_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_andnot_release(i, v); } @@ -381,6 +402,7 @@ atomic_or(int i, atomic_t *v) static __always_inline int atomic_fetch_or(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_or(i, v); } @@ -395,6 +417,7 @@ atomic_fetch_or_acquire(int i, atomic_t *v) static __always_inline int atomic_fetch_or_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_or_release(i, v); } @@ -416,6 +439,7 @@ atomic_xor(int i, atomic_t *v) static __always_inline int atomic_fetch_xor(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_xor(i, v); } @@ -430,6 +454,7 @@ atomic_fetch_xor_acquire(int i, atomic_t *v) static __always_inline int atomic_fetch_xor_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_xor_release(i, v); } @@ -444,6 +469,7 @@ atomic_fetch_xor_relaxed(int i, atomic_t *v) static __always_inline int atomic_xchg(atomic_t *v, int i) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_xchg(v, i); } @@ -458,6 +484,7 @@ atomic_xchg_acquire(atomic_t *v, int i) static __always_inline int atomic_xchg_release(atomic_t *v, int i) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_xchg_release(v, i); } @@ -472,6 +499,7 @@ atomic_xchg_relaxed(atomic_t *v, int i) static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_cmpxchg(v, old, new); } @@ -486,6 +514,7 @@ atomic_cmpxchg_acquire(atomic_t *v, int old, int new) static __always_inline int atomic_cmpxchg_release(atomic_t *v, int old, int new) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_cmpxchg_release(v, old, new); } @@ -500,6 +529,7 @@ atomic_cmpxchg_relaxed(atomic_t *v, int old, int new) static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_try_cmpxchg(v, old, new); @@ -516,6 +546,7 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) static __always_inline bool atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_try_cmpxchg_release(v, old, new); @@ -532,6 +563,7 @@ atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new) static __always_inline bool atomic_sub_and_test(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_sub_and_test(i, v); } @@ -539,6 +571,7 @@ atomic_sub_and_test(int i, atomic_t *v) static __always_inline bool atomic_dec_and_test(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_and_test(v); } @@ -546,6 +579,7 @@ atomic_dec_and_test(atomic_t *v) static __always_inline bool atomic_inc_and_test(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_and_test(v); } @@ -553,6 +587,7 @@ atomic_inc_and_test(atomic_t *v) static __always_inline bool atomic_add_negative(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_add_negative(i, v); } @@ -560,6 +595,7 @@ atomic_add_negative(int i, atomic_t *v) static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_add_unless(v, a, u); } @@ -567,6 +603,7 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u) static __always_inline bool atomic_add_unless(atomic_t *v, int a, int u) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_add_unless(v, a, u); } @@ -574,6 +611,7 @@ atomic_add_unless(atomic_t *v, int a, int u) static __always_inline bool atomic_inc_not_zero(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_not_zero(v); } @@ -581,6 +619,7 @@ atomic_inc_not_zero(atomic_t *v) static __always_inline bool atomic_inc_unless_negative(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_unless_negative(v); } @@ -588,6 +627,7 @@ atomic_inc_unless_negative(atomic_t *v) static __always_inline bool atomic_dec_unless_positive(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_unless_positive(v); } @@ -595,6 +635,7 @@ atomic_dec_unless_positive(atomic_t *v) static __always_inline int atomic_dec_if_positive(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_if_positive(v); } @@ -623,6 +664,7 @@ atomic64_set(atomic64_t *v, s64 i) static __always_inline void atomic64_set_release(atomic64_t *v, s64 i) { + kcsan_release(); instrument_atomic_write(v, sizeof(*v)); arch_atomic64_set_release(v, i); } @@ -637,6 +679,7 @@ atomic64_add(s64 i, atomic64_t *v) static __always_inline s64 atomic64_add_return(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_add_return(i, v); } @@ -651,6 +694,7 @@ atomic64_add_return_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_add_return_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_add_return_release(i, v); } @@ -665,6 +709,7 @@ atomic64_add_return_relaxed(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_add(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_add(i, v); } @@ -679,6 +724,7 @@ atomic64_fetch_add_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_add_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_add_release(i, v); } @@ -700,6 +746,7 @@ atomic64_sub(s64 i, atomic64_t *v) static __always_inline s64 atomic64_sub_return(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_sub_return(i, v); } @@ -714,6 +761,7 @@ atomic64_sub_return_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_sub_return_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_sub_return_release(i, v); } @@ -728,6 +776,7 @@ atomic64_sub_return_relaxed(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_sub(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_sub(i, v); } @@ -742,6 +791,7 @@ atomic64_fetch_sub_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_sub_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_sub_release(i, v); } @@ -763,6 +813,7 @@ atomic64_inc(atomic64_t *v) static __always_inline s64 atomic64_inc_return(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_return(v); } @@ -777,6 +828,7 @@ atomic64_inc_return_acquire(atomic64_t *v) static __always_inline s64 atomic64_inc_return_release(atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_return_release(v); } @@ -791,6 +843,7 @@ atomic64_inc_return_relaxed(atomic64_t *v) static __always_inline s64 atomic64_fetch_inc(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_inc(v); } @@ -805,6 +858,7 @@ atomic64_fetch_inc_acquire(atomic64_t *v) static __always_inline s64 atomic64_fetch_inc_release(atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_inc_release(v); } @@ -826,6 +880,7 @@ atomic64_dec(atomic64_t *v) static __always_inline s64 atomic64_dec_return(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_return(v); } @@ -840,6 +895,7 @@ atomic64_dec_return_acquire(atomic64_t *v) static __always_inline s64 atomic64_dec_return_release(atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_return_release(v); } @@ -854,6 +910,7 @@ atomic64_dec_return_relaxed(atomic64_t *v) static __always_inline s64 atomic64_fetch_dec(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_dec(v); } @@ -868,6 +925,7 @@ atomic64_fetch_dec_acquire(atomic64_t *v) static __always_inline s64 atomic64_fetch_dec_release(atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_dec_release(v); } @@ -889,6 +947,7 @@ atomic64_and(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_and(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_and(i, v); } @@ -903,6 +962,7 @@ atomic64_fetch_and_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_and_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_and_release(i, v); } @@ -924,6 +984,7 @@ atomic64_andnot(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_andnot(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_andnot(i, v); } @@ -938,6 +999,7 @@ atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_andnot_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_andnot_release(i, v); } @@ -959,6 +1021,7 @@ atomic64_or(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_or(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_or(i, v); } @@ -973,6 +1036,7 @@ atomic64_fetch_or_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_or_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_or_release(i, v); } @@ -994,6 +1058,7 @@ atomic64_xor(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_xor(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_xor(i, v); } @@ -1008,6 +1073,7 @@ atomic64_fetch_xor_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_xor_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_xor_release(i, v); } @@ -1022,6 +1088,7 @@ atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v) static __always_inline s64 atomic64_xchg(atomic64_t *v, s64 i) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_xchg(v, i); } @@ -1036,6 +1103,7 @@ atomic64_xchg_acquire(atomic64_t *v, s64 i) static __always_inline s64 atomic64_xchg_release(atomic64_t *v, s64 i) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_xchg_release(v, i); } @@ -1050,6 +1118,7 @@ atomic64_xchg_relaxed(atomic64_t *v, s64 i) static __always_inline s64 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_cmpxchg(v, old, new); } @@ -1064,6 +1133,7 @@ atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) static __always_inline s64 atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_cmpxchg_release(v, old, new); } @@ -1078,6 +1148,7 @@ atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new) static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic64_try_cmpxchg(v, old, new); @@ -1094,6 +1165,7 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) static __always_inline bool atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic64_try_cmpxchg_release(v, old, new); @@ -1110,6 +1182,7 @@ atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new) static __always_inline bool atomic64_sub_and_test(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_sub_and_test(i, v); } @@ -1117,6 +1190,7 @@ atomic64_sub_and_test(s64 i, atomic64_t *v) static __always_inline bool atomic64_dec_and_test(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_and_test(v); } @@ -1124,6 +1198,7 @@ atomic64_dec_and_test(atomic64_t *v) static __always_inline bool atomic64_inc_and_test(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_and_test(v); } @@ -1131,6 +1206,7 @@ atomic64_inc_and_test(atomic64_t *v) static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_add_negative(i, v); } @@ -1138,6 +1214,7 @@ atomic64_add_negative(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_add_unless(v, a, u); } @@ -1145,6 +1222,7 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) static __always_inline bool atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_add_unless(v, a, u); } @@ -1152,6 +1230,7 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u) static __always_inline bool atomic64_inc_not_zero(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_not_zero(v); } @@ -1159,6 +1238,7 @@ atomic64_inc_not_zero(atomic64_t *v) static __always_inline bool atomic64_inc_unless_negative(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_unless_negative(v); } @@ -1166,6 +1246,7 @@ atomic64_inc_unless_negative(atomic64_t *v) static __always_inline bool atomic64_dec_unless_positive(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_unless_positive(v); } @@ -1173,6 +1254,7 @@ atomic64_dec_unless_positive(atomic64_t *v) static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_if_positive(v); } @@ -1201,6 +1283,7 @@ atomic_long_set(atomic_long_t *v, long i) static __always_inline void atomic_long_set_release(atomic_long_t *v, long i) { + kcsan_release(); instrument_atomic_write(v, sizeof(*v)); arch_atomic_long_set_release(v, i); } @@ -1215,6 +1298,7 @@ atomic_long_add(long i, atomic_long_t *v) static __always_inline long atomic_long_add_return(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_add_return(i, v); } @@ -1229,6 +1313,7 @@ atomic_long_add_return_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_add_return_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_add_return_release(i, v); } @@ -1243,6 +1328,7 @@ atomic_long_add_return_relaxed(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_add(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_add(i, v); } @@ -1257,6 +1343,7 @@ atomic_long_fetch_add_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_add_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_add_release(i, v); } @@ -1278,6 +1365,7 @@ atomic_long_sub(long i, atomic_long_t *v) static __always_inline long atomic_long_sub_return(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_sub_return(i, v); } @@ -1292,6 +1380,7 @@ atomic_long_sub_return_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_sub_return_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_sub_return_release(i, v); } @@ -1306,6 +1395,7 @@ atomic_long_sub_return_relaxed(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_sub(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_sub(i, v); } @@ -1320,6 +1410,7 @@ atomic_long_fetch_sub_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_sub_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_sub_release(i, v); } @@ -1341,6 +1432,7 @@ atomic_long_inc(atomic_long_t *v) static __always_inline long atomic_long_inc_return(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_return(v); } @@ -1355,6 +1447,7 @@ atomic_long_inc_return_acquire(atomic_long_t *v) static __always_inline long atomic_long_inc_return_release(atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_return_release(v); } @@ -1369,6 +1462,7 @@ atomic_long_inc_return_relaxed(atomic_long_t *v) static __always_inline long atomic_long_fetch_inc(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_inc(v); } @@ -1383,6 +1477,7 @@ atomic_long_fetch_inc_acquire(atomic_long_t *v) static __always_inline long atomic_long_fetch_inc_release(atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_inc_release(v); } @@ -1404,6 +1499,7 @@ atomic_long_dec(atomic_long_t *v) static __always_inline long atomic_long_dec_return(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_return(v); } @@ -1418,6 +1514,7 @@ atomic_long_dec_return_acquire(atomic_long_t *v) static __always_inline long atomic_long_dec_return_release(atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_return_release(v); } @@ -1432,6 +1529,7 @@ atomic_long_dec_return_relaxed(atomic_long_t *v) static __always_inline long atomic_long_fetch_dec(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_dec(v); } @@ -1446,6 +1544,7 @@ atomic_long_fetch_dec_acquire(atomic_long_t *v) static __always_inline long atomic_long_fetch_dec_release(atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_dec_release(v); } @@ -1467,6 +1566,7 @@ atomic_long_and(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_and(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_and(i, v); } @@ -1481,6 +1581,7 @@ atomic_long_fetch_and_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_and_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_and_release(i, v); } @@ -1502,6 +1603,7 @@ atomic_long_andnot(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_andnot(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_andnot(i, v); } @@ -1516,6 +1618,7 @@ atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_andnot_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_andnot_release(i, v); } @@ -1537,6 +1640,7 @@ atomic_long_or(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_or(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_or(i, v); } @@ -1551,6 +1655,7 @@ atomic_long_fetch_or_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_or_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_or_release(i, v); } @@ -1572,6 +1677,7 @@ atomic_long_xor(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_xor(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_xor(i, v); } @@ -1586,6 +1692,7 @@ atomic_long_fetch_xor_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_xor_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_xor_release(i, v); } @@ -1600,6 +1707,7 @@ atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v) static __always_inline long atomic_long_xchg(atomic_long_t *v, long i) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_xchg(v, i); } @@ -1614,6 +1722,7 @@ atomic_long_xchg_acquire(atomic_long_t *v, long i) static __always_inline long atomic_long_xchg_release(atomic_long_t *v, long i) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_xchg_release(v, i); } @@ -1628,6 +1737,7 @@ atomic_long_xchg_relaxed(atomic_long_t *v, long i) static __always_inline long atomic_long_cmpxchg(atomic_long_t *v, long old, long new) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_cmpxchg(v, old, new); } @@ -1642,6 +1752,7 @@ atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new) static __always_inline long atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_cmpxchg_release(v, old, new); } @@ -1656,6 +1767,7 @@ atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new) static __always_inline bool atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_long_try_cmpxchg(v, old, new); @@ -1672,6 +1784,7 @@ atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) static __always_inline bool atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_long_try_cmpxchg_release(v, old, new); @@ -1688,6 +1801,7 @@ atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new) static __always_inline bool atomic_long_sub_and_test(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_sub_and_test(i, v); } @@ -1695,6 +1809,7 @@ atomic_long_sub_and_test(long i, atomic_long_t *v) static __always_inline bool atomic_long_dec_and_test(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_and_test(v); } @@ -1702,6 +1817,7 @@ atomic_long_dec_and_test(atomic_long_t *v) static __always_inline bool atomic_long_inc_and_test(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_and_test(v); } @@ -1709,6 +1825,7 @@ atomic_long_inc_and_test(atomic_long_t *v) static __always_inline bool atomic_long_add_negative(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_add_negative(i, v); } @@ -1716,6 +1833,7 @@ atomic_long_add_negative(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_add_unless(v, a, u); } @@ -1723,6 +1841,7 @@ atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) static __always_inline bool atomic_long_add_unless(atomic_long_t *v, long a, long u) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_add_unless(v, a, u); } @@ -1730,6 +1849,7 @@ atomic_long_add_unless(atomic_long_t *v, long a, long u) static __always_inline bool atomic_long_inc_not_zero(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_not_zero(v); } @@ -1737,6 +1857,7 @@ atomic_long_inc_not_zero(atomic_long_t *v) static __always_inline bool atomic_long_inc_unless_negative(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_unless_negative(v); } @@ -1744,6 +1865,7 @@ atomic_long_inc_unless_negative(atomic_long_t *v) static __always_inline bool atomic_long_dec_unless_positive(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_unless_positive(v); } @@ -1751,6 +1873,7 @@ atomic_long_dec_unless_positive(atomic_long_t *v) static __always_inline long atomic_long_dec_if_positive(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_if_positive(v); } @@ -1758,6 +1881,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define xchg(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_xchg(__ai_ptr, __VA_ARGS__); \ }) @@ -1772,6 +1896,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define xchg_release(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_release(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_xchg_release(__ai_ptr, __VA_ARGS__); \ }) @@ -1786,6 +1911,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define cmpxchg(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg(__ai_ptr, __VA_ARGS__); \ }) @@ -1800,6 +1926,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define cmpxchg_release(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_release(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg_release(__ai_ptr, __VA_ARGS__); \ }) @@ -1814,6 +1941,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define cmpxchg64(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg64(__ai_ptr, __VA_ARGS__); \ }) @@ -1828,6 +1956,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define cmpxchg64_release(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_release(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg64_release(__ai_ptr, __VA_ARGS__); \ }) @@ -1843,6 +1972,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) ({ \ typeof(ptr) __ai_ptr = (ptr); \ typeof(oldp) __ai_oldp = (oldp); \ + kcsan_mb(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \ arch_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \ @@ -1861,6 +1991,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) ({ \ typeof(ptr) __ai_ptr = (ptr); \ typeof(oldp) __ai_oldp = (oldp); \ + kcsan_release(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \ arch_try_cmpxchg_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \ @@ -1892,6 +2023,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define sync_cmpxchg(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_sync_cmpxchg(__ai_ptr, __VA_ARGS__); \ }) @@ -1899,6 +2031,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define cmpxchg_double(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ instrument_atomic_write(__ai_ptr, 2 * sizeof(*__ai_ptr)); \ arch_cmpxchg_double(__ai_ptr, __VA_ARGS__); \ }) @@ -1912,4 +2045,4 @@ atomic_long_dec_if_positive(atomic_long_t *v) }) #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// 2a9553f0a9d5619f19151092df5cabbbf16ce835 +// 87c974b93032afd42143613434d1a7788fa598f9 -- cgit v1.2.3 From 04def1b9b4a3d27ef80e7fbf1b17f1897beb1ce4 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 30 Nov 2021 12:44:25 +0100 Subject: asm-generic/bitops, kcsan: Add instrumentation for barriers Adds the required KCSAN instrumentation for barriers of atomic bitops. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/asm-generic/bitops/instrumented-atomic.h | 3 +++ include/asm-generic/bitops/instrumented-lock.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/asm-generic/bitops/instrumented-atomic.h b/include/asm-generic/bitops/instrumented-atomic.h index 81915dcd4b4e..c90192b1c755 100644 --- a/include/asm-generic/bitops/instrumented-atomic.h +++ b/include/asm-generic/bitops/instrumented-atomic.h @@ -67,6 +67,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr) */ static inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { + kcsan_mb(); instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long)); return arch_test_and_set_bit(nr, addr); } @@ -80,6 +81,7 @@ static inline bool test_and_set_bit(long nr, volatile unsigned long *addr) */ static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) { + kcsan_mb(); instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long)); return arch_test_and_clear_bit(nr, addr); } @@ -93,6 +95,7 @@ static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) */ static inline bool test_and_change_bit(long nr, volatile unsigned long *addr) { + kcsan_mb(); instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long)); return arch_test_and_change_bit(nr, addr); } diff --git a/include/asm-generic/bitops/instrumented-lock.h b/include/asm-generic/bitops/instrumented-lock.h index 75ef606f7145..eb64bd4f11f3 100644 --- a/include/asm-generic/bitops/instrumented-lock.h +++ b/include/asm-generic/bitops/instrumented-lock.h @@ -22,6 +22,7 @@ */ static inline void clear_bit_unlock(long nr, volatile unsigned long *addr) { + kcsan_release(); instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long)); arch_clear_bit_unlock(nr, addr); } @@ -37,6 +38,7 @@ static inline void clear_bit_unlock(long nr, volatile unsigned long *addr) */ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) { + kcsan_release(); instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___clear_bit_unlock(nr, addr); } @@ -71,6 +73,7 @@ static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr) static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) { + kcsan_release(); instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long)); return arch_clear_bit_unlock_is_negative_byte(nr, addr); } -- cgit v1.2.3 From a015b7085979b12e55f67f3b86be0321fff6be3f Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 30 Nov 2021 12:44:32 +0100 Subject: compiler_attributes.h: Add __disable_sanitizer_instrumentation The new attribute maps to __attribute__((disable_sanitizer_instrumentation)), which will be supported by Clang >= 14.0. Future support in GCC is also possible. This attribute disables compiler instrumentation for kernel sanitizer tools, making it easier to implement noinstr. It is different from the existing __no_sanitize* attributes, which may still allow certain types of instrumentation to prevent false positives. Signed-off-by: Alexander Potapenko Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/compiler_attributes.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index b9121afd8733..37e260020221 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -308,6 +308,24 @@ # define __compiletime_warning(msg) #endif +/* + * Optional: only supported since clang >= 14.0 + * + * clang: https://clang.llvm.org/docs/AttributeReference.html#disable-sanitizer-instrumentation + * + * disable_sanitizer_instrumentation is not always similar to + * no_sanitize(()): the latter may still let specific sanitizers + * insert code into functions to prevent false positives. Unlike that, + * disable_sanitizer_instrumentation prevents all kinds of instrumentation to + * functions with the attribute. + */ +#if __has_attribute(disable_sanitizer_instrumentation) +# define __disable_sanitizer_instrumentation \ + __attribute__((disable_sanitizer_instrumentation)) +#else +# define __disable_sanitizer_instrumentation +#endif + /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute -- cgit v1.2.3 From bd3d5bd1a0ad386475ea7a3de8a91e7d8a600536 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 30 Nov 2021 12:44:33 +0100 Subject: kcsan: Support WEAK_MEMORY with Clang where no objtool support exists Clang and GCC behave a little differently when it comes to the __no_sanitize_thread attribute, which has valid reasons, and depending on context either one could be right. Traditionally, user space ThreadSanitizer [1] still expects instrumented builtin atomics (to avoid false positives) and __tsan_func_{entry,exit} (to generate meaningful stack traces), even if the function has the attribute no_sanitize("thread"). [1] https://clang.llvm.org/docs/ThreadSanitizer.html#attribute-no-sanitize-thread GCC doesn't follow the same policy (for better or worse), and removes all kinds of instrumentation if no_sanitize is added. Arguably, since this may be a problem for user space ThreadSanitizer, we expect this may change in future. Since KCSAN != ThreadSanitizer, the likelihood of false positives even without barrier instrumentation everywhere, is much lower by design. At least for Clang, however, to fully remove all sanitizer instrumentation, we must add the disable_sanitizer_instrumentation attribute, which is available since Clang 14.0. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/compiler_types.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 1d32f4c03c9e..3c1795fdb568 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -198,9 +198,20 @@ struct ftrace_likely_data { # define __no_kasan_or_inline __always_inline #endif -#define __no_kcsan __no_sanitize_thread #ifdef __SANITIZE_THREAD__ +/* + * Clang still emits instrumentation for __tsan_func_{entry,exit}() and builtin + * atomics even with __no_sanitize_thread (to avoid false positives in userspace + * ThreadSanitizer). The kernel's requirements are stricter and we really do not + * want any instrumentation with __no_kcsan. + * + * Therefore we add __disable_sanitizer_instrumentation where available to + * disable all instrumentation. See Kconfig.kcsan where this is mandatory. + */ +# define __no_kcsan __no_sanitize_thread __disable_sanitizer_instrumentation # define __no_sanitize_or_inline __no_kcsan notrace __maybe_unused +#else +# define __no_kcsan #endif #ifndef __no_sanitize_or_inline -- cgit v1.2.3 From 80d7476fa20a3cc83f76b3b02a7575891d1e7511 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Sat, 4 Dec 2021 13:57:03 +0100 Subject: kcsan: Turn barrier instrumentation into macros Some architectures use barriers in 'extern inline' functions, from which we should not refer to static inline functions. For example, building Alpha with gcc and W=1 shows: ./include/asm-generic/barrier.h:70:30: warning: 'kcsan_rmb' is static but used in inline function 'pmd_offset' which is not static 70 | #define smp_rmb() do { kcsan_rmb(); __smp_rmb(); } while (0) | ^~~~~~~~~ ./arch/alpha/include/asm/pgtable.h:293:9: note: in expansion of macro 'smp_rmb' 293 | smp_rmb(); /* see above */ | ^~~~~~~ Which seems to warn about 6.7.4#3 of the C standard: "An inline definition of a function with external linkage shall not contain a definition of a modifiable object with static or thread storage duration, and shall not contain a reference to an identifier with internal linkage." Fix it by turning barrier instrumentation into macros, which matches definitions in . Perhaps we can revert this change in future, when there are no more 'extern inline' users left. Link: https://lkml.kernel.org/r/202112041334.X44uWZXf-lkp@intel.com Reported-by: kernel test robot Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan-checks.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index 9d2c869167f2..92f3843d9ebb 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -241,28 +241,30 @@ static inline void __kcsan_disable_current(void) { } * disabled with the __no_kcsan function attribute. * * Also see definition of __tsan_atomic_signal_fence() in kernel/kcsan/core.c. + * + * These are all macros, like , since some architectures use them + * in non-static inline functions. */ #define __KCSAN_BARRIER_TO_SIGNAL_FENCE(name) \ - static __always_inline void kcsan_##name(void) \ - { \ + do { \ barrier(); \ __atomic_signal_fence(__KCSAN_BARRIER_TO_SIGNAL_FENCE_##name); \ barrier(); \ - } -__KCSAN_BARRIER_TO_SIGNAL_FENCE(mb) -__KCSAN_BARRIER_TO_SIGNAL_FENCE(wmb) -__KCSAN_BARRIER_TO_SIGNAL_FENCE(rmb) -__KCSAN_BARRIER_TO_SIGNAL_FENCE(release) + } while (0) +#define kcsan_mb() __KCSAN_BARRIER_TO_SIGNAL_FENCE(mb) +#define kcsan_wmb() __KCSAN_BARRIER_TO_SIGNAL_FENCE(wmb) +#define kcsan_rmb() __KCSAN_BARRIER_TO_SIGNAL_FENCE(rmb) +#define kcsan_release() __KCSAN_BARRIER_TO_SIGNAL_FENCE(release) #elif defined(CONFIG_KCSAN_WEAK_MEMORY) && defined(__KCSAN_INSTRUMENT_BARRIERS__) #define kcsan_mb __kcsan_mb #define kcsan_wmb __kcsan_wmb #define kcsan_rmb __kcsan_rmb #define kcsan_release __kcsan_release #else /* CONFIG_KCSAN_WEAK_MEMORY && ... */ -static inline void kcsan_mb(void) { } -static inline void kcsan_wmb(void) { } -static inline void kcsan_rmb(void) { } -static inline void kcsan_release(void) { } +#define kcsan_mb() do { } while (0) +#define kcsan_wmb() do { } while (0) +#define kcsan_rmb() do { } while (0) +#define kcsan_release() do { } while (0) #endif /* CONFIG_KCSAN_WEAK_MEMORY && ... */ /** -- cgit v1.2.3 From db67097aa6f2587b44055f2e16db72a11e17faef Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 9 Dec 2021 21:31:56 -0700 Subject: pktdvd: stop using bdi congestion framework. The bdi congestion framework isn't widely used and should be deprecated. pktdvd makes use of it to track congestion, but this can be done entirely internally to pktdvd, so it doesn't need to use the framework. So introduce a "congested" flag. When waiting for bio_queue_size to drop, set this flag and a var_waitqueue() to wait for it. When bio_queue_size does drop and this flag is set, clear the flag and call wake_up_var(). We don't use a wait_var_event macro for the waiting as we need to set the flag and drop the spinlock before calling schedule() and while that is possible with __wait_var_event(), result is not easy to read. Reviewed-by: Christoph Hellwig Signed-off-by: NeilBrown Link: https://lore.kernel.org/r/163910843527.9928.857338663717630212@noble.neil.brown.name Signed-off-by: Jens Axboe --- include/linux/pktcdvd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index 174601554b06..c391e694aa26 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -183,6 +183,8 @@ struct pktcdvd_device spinlock_t lock; /* Serialize access to bio_queue */ struct rb_root bio_queue; /* Work queue of bios we need to handle */ int bio_queue_size; /* Number of nodes in bio_queue */ + bool congested; /* Someone is waiting for bio_queue_size + * to drop. */ sector_t current_sector; /* Keep track of where the elevator is */ atomic_t scan_queue; /* Set to non-zero when pkt_handle_queue */ /* needs to be run. */ -- cgit v1.2.3 From f95711242390d759f69fd67ad46b31491fe904d6 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 8 Dec 2021 17:43:53 +0000 Subject: EDAC: Add RDDR5 and LRDDR5 memory types Include Registered-DDR5 and Load-Reduced DDR5 in the list of memory types. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211208174356.1997855-2-yazen.ghannam@amd.com --- include/linux/edac.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/edac.h b/include/linux/edac.h index 4207d06996a4..e730b3468719 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -182,6 +182,8 @@ static inline char *mc_event_error_type(const unsigned int err_type) * @MEM_LRDDR4: Load-Reduced DDR4 memory. * @MEM_LPDDR4: Low-Power DDR4 memory. * @MEM_DDR5: Unbuffered DDR5 RAM + * @MEM_RDDR5: Registered DDR5 RAM + * @MEM_LRDDR5: Load-Reduced DDR5 memory. * @MEM_NVDIMM: Non-volatile RAM * @MEM_WIO2: Wide I/O 2. * @MEM_HBM2: High bandwidth Memory Gen 2. @@ -211,6 +213,8 @@ enum mem_type { MEM_LRDDR4, MEM_LPDDR4, MEM_DDR5, + MEM_RDDR5, + MEM_LRDDR5, MEM_NVDIMM, MEM_WIO2, MEM_HBM2, @@ -239,6 +243,8 @@ enum mem_type { #define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) #define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4) #define MEM_FLAG_DDR5 BIT(MEM_DDR5) +#define MEM_FLAG_RDDR5 BIT(MEM_RDDR5) +#define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5) #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) #define MEM_FLAG_WIO2 BIT(MEM_WIO2) #define MEM_FLAG_HBM2 BIT(MEM_HBM2) -- cgit v1.2.3 From 1614b2b11fab29dd4ff31ebba9d266961f5af69e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 29 Nov 2021 14:28:41 +0000 Subject: arch: Make ARCH_STACKWALK independent of STACKTRACE Make arch_stack_walk() available for ARCH_STACKWALK architectures without it being entangled in STACKTRACE. Link: https://lore.kernel.org/lkml/20211022152104.356586621@infradead.org/ Signed-off-by: Peter Zijlstra (Intel) [Mark: rebase, drop unnecessary arm change] Signed-off-by: Mark Rutland Cc: Albert Ou Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Dave Hansen Cc: Heiko Carstens Cc: Ingo Molnar Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Thomas Gleixner Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20211129142849.3056714-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- include/linux/stacktrace.h | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index bef158815e83..97455880ac41 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -8,22 +8,6 @@ struct task_struct; struct pt_regs; -#ifdef CONFIG_STACKTRACE -void stack_trace_print(const unsigned long *trace, unsigned int nr_entries, - int spaces); -int stack_trace_snprint(char *buf, size_t size, const unsigned long *entries, - unsigned int nr_entries, int spaces); -unsigned int stack_trace_save(unsigned long *store, unsigned int size, - unsigned int skipnr); -unsigned int stack_trace_save_tsk(struct task_struct *task, - unsigned long *store, unsigned int size, - unsigned int skipnr); -unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store, - unsigned int size, unsigned int skipnr); -unsigned int stack_trace_save_user(unsigned long *store, unsigned int size); -unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries); - -/* Internal interfaces. Do not use in generic code */ #ifdef CONFIG_ARCH_STACKWALK /** @@ -76,8 +60,25 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, void *cookie, void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, const struct pt_regs *regs); +#endif /* CONFIG_ARCH_STACKWALK */ -#else /* CONFIG_ARCH_STACKWALK */ +#ifdef CONFIG_STACKTRACE +void stack_trace_print(const unsigned long *trace, unsigned int nr_entries, + int spaces); +int stack_trace_snprint(char *buf, size_t size, const unsigned long *entries, + unsigned int nr_entries, int spaces); +unsigned int stack_trace_save(unsigned long *store, unsigned int size, + unsigned int skipnr); +unsigned int stack_trace_save_tsk(struct task_struct *task, + unsigned long *store, unsigned int size, + unsigned int skipnr); +unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store, + unsigned int size, unsigned int skipnr); +unsigned int stack_trace_save_user(unsigned long *store, unsigned int size); +unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries); + +#ifndef CONFIG_ARCH_STACKWALK +/* Internal interfaces. Do not use in generic code */ struct stack_trace { unsigned int nr_entries, max_entries; unsigned long *entries; -- cgit v1.2.3 From 9ba74e6c9e9d0c5c1e5792a7111fc7d1a0589cb8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Dec 2021 23:44:21 -0800 Subject: net: add networking namespace refcount tracker We have 100+ syzbot reports about netns being dismantled too soon, still unresolved as of today. We think a missing get_net() or an extra put_net() is the root cause. In order to find the bug(s), and be able to spot future ones, this patch adds CONFIG_NET_NS_REFCNT_TRACKER and new helpers to precisely pair all put_net() with corresponding get_net(). To use these helpers, each data structure owning a refcount should also use a "netns_tracker" to pair the get and put. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 9 +-------- include/net/net_namespace.h | 34 ++++++++++++++++++++++++++++++++++ include/net/net_trackers.h | 18 ++++++++++++++++++ 3 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 include/net/net_trackers.h (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1a748ee9a421..235d5d082f1a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -48,7 +48,7 @@ #include #include #include -#include +#include struct netpoll_info; struct device; @@ -300,13 +300,6 @@ enum netdev_state_t { __LINK_STATE_TESTING, }; - -#ifdef CONFIG_NET_DEV_REFCNT_TRACKER -typedef struct ref_tracker *netdevice_tracker; -#else -typedef struct {} netdevice_tracker; -#endif - struct gro_list { struct list_head list; int count; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bb5fa5914032..5b61c462e534 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -87,6 +88,7 @@ struct net { struct idr netns_ids; struct ns_common ns; + struct ref_tracker_dir refcnt_tracker; struct list_head dev_base_head; struct proc_dir_entry *proc_net; @@ -240,6 +242,7 @@ void ipx_unregister_sysctl(void); #ifdef CONFIG_NET_NS void __put_net(struct net *net); +/* Try using get_net_track() instead */ static inline struct net *get_net(struct net *net) { refcount_inc(&net->ns.count); @@ -258,6 +261,7 @@ static inline struct net *maybe_get_net(struct net *net) return net; } +/* Try using put_net_track() instead */ static inline void put_net(struct net *net) { if (refcount_dec_and_test(&net->ns.count)) @@ -308,6 +312,36 @@ static inline int check_net(const struct net *net) #endif +static inline void netns_tracker_alloc(struct net *net, + netns_tracker *tracker, gfp_t gfp) +{ +#ifdef CONFIG_NET_NS_REFCNT_TRACKER + ref_tracker_alloc(&net->refcnt_tracker, tracker, gfp); +#endif +} + +static inline void netns_tracker_free(struct net *net, + netns_tracker *tracker) +{ +#ifdef CONFIG_NET_NS_REFCNT_TRACKER + ref_tracker_free(&net->refcnt_tracker, tracker); +#endif +} + +static inline struct net *get_net_track(struct net *net, + netns_tracker *tracker, gfp_t gfp) +{ + get_net(net); + netns_tracker_alloc(net, tracker, gfp); + return net; +} + +static inline void put_net_track(struct net *net, netns_tracker *tracker) +{ + netns_tracker_free(net, tracker); + put_net(net); +} + typedef struct { #ifdef CONFIG_NET_NS struct net *net; diff --git a/include/net/net_trackers.h b/include/net/net_trackers.h new file mode 100644 index 000000000000..d94c76cf15a9 --- /dev/null +++ b/include/net/net_trackers.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_NET_TRACKERS_H +#define __NET_NET_TRACKERS_H +#include + +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +typedef struct ref_tracker *netdevice_tracker; +#else +typedef struct {} netdevice_tracker; +#endif + +#ifdef CONFIG_NET_NS_REFCNT_TRACKER +typedef struct ref_tracker *netns_tracker; +#else +typedef struct {} netns_tracker; +#endif + +#endif /* __NET_NET_TRACKERS_H */ -- cgit v1.2.3 From ffa84b5ffb37a957d6062385112ab1069f760de6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Dec 2021 23:44:22 -0800 Subject: net: add netns refcount tracker to struct sock Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index ae61cd0b650d..5d8532f26208 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -350,6 +350,7 @@ struct bpf_local_storage; * @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags + * @ns_tracker: tracker for netns reference */ struct sock { /* @@ -538,6 +539,7 @@ struct sock { struct bpf_local_storage __rcu *sk_bpf_storage; #endif struct rcu_head sk_rcu; + netns_tracker ns_tracker; }; enum sk_pacing { -- cgit v1.2.3 From 04a931e58d1944ab3d1e11fdfde1947fbe5b6a37 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Dec 2021 23:44:23 -0800 Subject: net: add netns refcount tracker to struct seq_net_private Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/seq_file_net.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/seq_file_net.h b/include/linux/seq_file_net.h index 0fdbe1ddd8d1..b97912fdbae7 100644 --- a/include/linux/seq_file_net.h +++ b/include/linux/seq_file_net.h @@ -9,7 +9,8 @@ extern struct net init_net; struct seq_net_private { #ifdef CONFIG_NET_NS - struct net *net; + struct net *net; + netns_tracker ns_tracker; #endif }; -- cgit v1.2.3 From dbdcda634ce384938805549bd1b3f3eaed50af5e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Dec 2021 23:44:24 -0800 Subject: net: sched: add netns refcount tracker to struct tcf_exts Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/pkt_cls.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 193f88ebf629..cebc1bd713b6 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -202,7 +202,8 @@ struct tcf_exts { __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ int nr_actions; struct tc_action **actions; - struct net *net; + struct net *net; + netns_tracker ns_tracker; #endif /* Map to export classifier specific extension TLV types to the * generic extensions API. Unsupported extensions must be set to 0. @@ -218,6 +219,7 @@ static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net, exts->type = 0; exts->nr_actions = 0; exts->net = net; + netns_tracker_alloc(net, &exts->ns_tracker, GFP_KERNEL); exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); if (!exts->actions) @@ -236,6 +238,8 @@ static inline bool tcf_exts_get_net(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT exts->net = maybe_get_net(exts->net); + if (exts->net) + netns_tracker_alloc(exts->net, &exts->ns_tracker, GFP_KERNEL); return exts->net != NULL; #else return true; @@ -246,7 +250,7 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT if (exts->net) - put_net(exts->net); + put_net_track(exts->net, &exts->ns_tracker); #endif } -- cgit v1.2.3 From e1b539bd73a76dc8a7bf82befe6eac4ae79c76b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Dec 2021 07:44:51 -0800 Subject: xfrm: add net device refcount tracker to struct xfrm_state_offload Signed-off-by: Eric Dumazet Acked-by: Steffen Klassert Link: https://lore.kernel.org/r/20211209154451.4184050-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/xfrm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2308210793a0..83b46da8873d 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -128,6 +128,7 @@ struct xfrm_state_walk { struct xfrm_state_offload { struct net_device *dev; + netdevice_tracker dev_tracker; struct net_device *real_dev; unsigned long offload_handle; unsigned int num_exthdrs; @@ -1913,7 +1914,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x) if (dev->xfrmdev_ops->xdo_dev_state_free) dev->xfrmdev_ops->xdo_dev_state_free(x); xso->dev = NULL; - dev_put(dev); + dev_put_track(dev, &xso->dev_tracker); } } #else -- cgit v1.2.3 From 65c7cdedeb3026fabcc967a7aae2f755ad4d0783 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Sep 2021 11:24:17 -0400 Subject: genirq: Provide new interfaces for affinity hints The discussion about removing the side effect of irq_set_affinity_hint() of actually applying the cpumask (if not NULL) as affinity to the interrupt, unearthed a few unpleasantries: 1) The modular perf drivers rely on the current behaviour for the very wrong reasons. 2) While none of the other drivers prevents user space from changing the affinity, a cursorily inspection shows that there are at least expectations in some drivers. #1 needs to be cleaned up anyway, so that's not a problem #2 might result in subtle regressions especially when irqbalanced (which nowadays ignores the affinity hint) is disabled. Provide new interfaces: irq_update_affinity_hint() - Only sets the affinity hint pointer irq_set_affinity_and_hint() - Set the pointer and apply the affinity to the interrupt Make irq_set_affinity_hint() a wrapper around irq_apply_affinity_hint() and document it to be phased out. Signed-off-by: Thomas Gleixner Signed-off-by: Nitesh Narayan Lal Signed-off-by: Thomas Gleixner Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210501021832.743094-1-jesse.brandeburg@intel.com Link: https://lore.kernel.org/r/20210903152430.244937-2-nitesh@redhat.com --- include/linux/interrupt.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 1f22a30c0963..9367f1cb2e3c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -329,7 +329,46 @@ extern int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask); extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); -extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); +extern int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, + bool setaffinity); + +/** + * irq_update_affinity_hint - Update the affinity hint + * @irq: Interrupt to update + * @m: cpumask pointer (NULL to clear the hint) + * + * Updates the affinity hint, but does not change the affinity of the interrupt. + */ +static inline int +irq_update_affinity_hint(unsigned int irq, const struct cpumask *m) +{ + return __irq_apply_affinity_hint(irq, m, false); +} + +/** + * irq_set_affinity_and_hint - Update the affinity hint and apply the provided + * cpumask to the interrupt + * @irq: Interrupt to update + * @m: cpumask pointer (NULL to clear the hint) + * + * Updates the affinity hint and if @m is not NULL it applies it as the + * affinity of that interrupt. + */ +static inline int +irq_set_affinity_and_hint(unsigned int irq, const struct cpumask *m) +{ + return __irq_apply_affinity_hint(irq, m, true); +} + +/* + * Deprecated. Use irq_update_affinity_hint() or irq_set_affinity_and_hint() + * instead. + */ +static inline int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) +{ + return irq_set_affinity_and_hint(irq, m); +} + extern int irq_update_affinity_desc(unsigned int irq, struct irq_affinity_desc *affinity); @@ -361,6 +400,18 @@ static inline int irq_can_set_affinity(unsigned int irq) static inline int irq_select_affinity(unsigned int irq) { return 0; } +static inline int irq_update_affinity_hint(unsigned int irq, + const struct cpumask *m) +{ + return -EINVAL; +} + +static inline int irq_set_affinity_and_hint(unsigned int irq, + const struct cpumask *m) +{ + return -EINVAL; +} + static inline int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) { -- cgit v1.2.3 From bd984c03097b8e9b7500cba7378040ac1c697dbb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 19 Nov 2021 11:20:33 -0800 Subject: f2fs: show more DIO information in tracepoint This prints more information of DIO in tracepoint. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index dcb94d740e12..f701bb23f83c 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -936,14 +936,14 @@ TRACE_EVENT(f2fs_fallocate, TRACE_EVENT(f2fs_direct_IO_enter, - TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw), + TP_PROTO(struct inode *inode, struct kiocb *iocb, long len, int rw), - TP_ARGS(inode, offset, len, rw), + TP_ARGS(inode, iocb, len, rw), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(loff_t, pos) + __field(struct kiocb *, iocb) __field(unsigned long, len) __field(int, rw) ), @@ -951,15 +951,18 @@ TRACE_EVENT(f2fs_direct_IO_enter, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->pos = offset; + __entry->iocb = iocb; __entry->len = len; __entry->rw = rw; ), - TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu rw = %d", + TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu ki_flags = %x ki_hint = %x ki_ioprio = %x rw = %d", show_dev_ino(__entry), - __entry->pos, + __entry->iocb->ki_pos, __entry->len, + __entry->iocb->ki_flags, + __entry->iocb->ki_hint, + __entry->iocb->ki_ioprio, __entry->rw) ); -- cgit v1.2.3 From 9dcc38e2813e0cd3b195940c98b181ce6ede8f20 Mon Sep 17 00:00:00 2001 From: Drew DeVault Date: Fri, 10 Dec 2021 14:46:09 -0800 Subject: Increase default MLOCK_LIMIT to 8 MiB This limit has not been updated since 2008, when it was increased to 64 KiB at the request of GnuPG. Until recently, the main use-cases for this feature were (1) preventing sensitive memory from being swapped, as in GnuPG's use-case; and (2) real-time use-cases. In the first case, little memory is called for, and in the second case, the user is generally in a position to increase it if they need more. The introduction of IOURING_REGISTER_BUFFERS adds a third use-case: preparing fixed buffers for high-performance I/O. This use-case will take as much of this memory as it can get, but is still limited to 64 KiB by default, which is very little. This increases the limit to 8 MB, which was chosen fairly arbitrarily as a more generous, but still conservative, default value. It is also possible to raise this limit in userspace. This is easily done, for example, in the use-case of a network daemon: systemd, for instance, provides for this via LimitMEMLOCK in the service file; OpenRC via the rc_ulimit variables. However, there is no established userspace facility for configuring this outside of daemons: end-user applications do not presently have access to a convenient means of raising their limits. The buck, as it were, stops with the kernel. It's much easier to address it here than it is to bring it to hundreds of distributions, and it can only realistically be relied upon to be high-enough by end-user software if it is more-or-less ubiquitous. Most distros don't change this particular rlimit from the kernel-supplied default value, so a change here will easily provide that ubiquity. Link: https://lkml.kernel.org/r/20211028080813.15966-1-sir@cmpwn.com Signed-off-by: Drew DeVault Acked-by: Jens Axboe Acked-by: Cyril Hrubis Acked-by: Johannes Weiner Cc: Pavel Begunkov Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: Andrew Dona-Couch Cc: Ammar Faizi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/resource.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/resource.h b/include/uapi/linux/resource.h index 74ef57b38f9f..ac5d6a3031db 100644 --- a/include/uapi/linux/resource.h +++ b/include/uapi/linux/resource.h @@ -66,10 +66,17 @@ struct rlimit64 { #define _STK_LIM (8*1024*1024) /* - * GPG2 wants 64kB of mlocked memory, to make sure pass phrases - * and other sensitive information are never written to disk. + * Limit the amount of locked memory by some sane default: + * root can always increase this limit if needed. + * + * The main use-cases are (1) preventing sensitive memory + * from being swapped; (2) real-time operations; (3) via + * IOURING_REGISTER_BUFFERS. + * + * The first two don't need much. The latter will take as + * much as it can get. 8MB is a reasonably sane default. */ -#define MLOCK_LIMIT ((PAGE_SIZE > 64*1024) ? PAGE_SIZE : 64*1024) +#define MLOCK_LIMIT (8*1024*1024) /* * Due to binary compatibility, the actual resource numbers -- cgit v1.2.3 From e4779015fd5d2fb8390c258268addff24d6077c7 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 10 Dec 2021 14:46:22 -0800 Subject: timers: implement usleep_idle_range() Patch series "mm/damon: Fix fake /proc/loadavg reports", v3. This patchset fixes DAMON's fake load report issue. The first patch makes yet another variant of usleep_range() for this fix, and the second patch fixes the issue of DAMON by making it using the newly introduced function. This patch (of 2): Some kernel threads such as DAMON could need to repeatedly sleep in micro seconds level. Because usleep_range() sleeps in uninterruptible state, however, such threads would make /proc/loadavg reports fake load. To help such cases, this commit implements a variant of usleep_range() called usleep_idle_range(). It is same to usleep_range() but sets the state of the current task as TASK_IDLE while sleeping. Link: https://lkml.kernel.org/r/20211126145015.15862-1-sj@kernel.org Link: https://lkml.kernel.org/r/20211126145015.15862-2-sj@kernel.org Signed-off-by: SeongJae Park Suggested-by: Andrew Morton Reviewed-by: Thomas Gleixner Tested-by: Oleksandr Natalenko Cc: John Stultz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/delay.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/delay.h b/include/linux/delay.h index 8eacf67eb212..039e7e0c7378 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -20,6 +20,7 @@ */ #include +#include extern unsigned long loops_per_jiffy; @@ -58,7 +59,18 @@ void calibrate_delay(void); void __attribute__((weak)) calibration_delay_done(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); -void usleep_range(unsigned long min, unsigned long max); +void usleep_range_state(unsigned long min, unsigned long max, + unsigned int state); + +static inline void usleep_range(unsigned long min, unsigned long max) +{ + usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); +} + +static inline void usleep_idle_range(unsigned long min, unsigned long max) +{ + usleep_range_state(min, max, TASK_IDLE); +} static inline void ssleep(unsigned int seconds) { -- cgit v1.2.3 From 33d60fbd21fa6f71a88571209e301ec6de59f81b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 8 Dec 2021 15:21:58 +0900 Subject: sock: Use sock_owned_by_user_nocheck() instead of sk_lock.owned. This patch moves sock_release_ownership() down in include/net/sock.h and replaces some sk_lock.owned tests with sock_owned_by_user_nocheck(). Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20211208062158.54132-1-kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski --- include/net/sock.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 5d8532f26208..37f878564d25 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1635,16 +1635,6 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) __sk_mem_reclaim(sk, SK_RECLAIM_CHUNK); } -static inline void sock_release_ownership(struct sock *sk) -{ - if (sk->sk_lock.owned) { - sk->sk_lock.owned = 0; - - /* The sk_lock has mutex_unlock() semantics: */ - mutex_release(&sk->sk_lock.dep_map, _RET_IP_); - } -} - /* * Macro so as to not evaluate some arguments when * lockdep is not enabled. @@ -1771,12 +1761,23 @@ static inline bool sock_owned_by_user_nocheck(const struct sock *sk) return sk->sk_lock.owned; } +static inline void sock_release_ownership(struct sock *sk) +{ + if (sock_owned_by_user_nocheck(sk)) { + sk->sk_lock.owned = 0; + + /* The sk_lock has mutex_unlock() semantics: */ + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); + } +} + /* no reclassification while locks are held */ static inline bool sock_allow_reclassification(const struct sock *csk) { struct sock *sk = (struct sock *)csk; - return !sk->sk_lock.owned && !spin_is_locked(&sk->sk_lock.slock); + return !sock_owned_by_user_nocheck(sk) && + !spin_is_locked(&sk->sk_lock.slock); } struct sock *sk_alloc(struct net *net, int family, gfp_t priority, -- cgit v1.2.3 From 840ece19e9f246a1b15308ae76b68aaf7a3a9433 Mon Sep 17 00:00:00 2001 From: Colin Foster Date: Wed, 8 Dec 2021 23:40:10 -0800 Subject: net: ocelot: fix missed include in the vsc7514_regs.h file commit 32ecd22ba60b ("net: mscc: ocelot: split register definitions to a separate file") left out an include for . It was missed because the only consumer was ocelot_vsc7514.h, which already included ocelot_vcap. Fixes: 32ecd22ba60b ("net: mscc: ocelot: split register definitions to a separate file") Signed-off-by: Colin Foster Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20211209074010.1813010-1-colin.foster@in-advantage.com Signed-off-by: Jakub Kicinski --- include/soc/mscc/vsc7514_regs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/vsc7514_regs.h b/include/soc/mscc/vsc7514_regs.h index 98743e252012..ceee26c96959 100644 --- a/include/soc/mscc/vsc7514_regs.h +++ b/include/soc/mscc/vsc7514_regs.h @@ -8,6 +8,8 @@ #ifndef VSC7514_REGS_H #define VSC7514_REGS_H +#include + extern const u32 vsc7514_ana_regmap[]; extern const u32 vsc7514_qs_regmap[]; extern const u32 vsc7514_qsys_regmap[]; -- cgit v1.2.3 From e5150f00721f6f8e7b4e7f31bff86b4b6a8de0d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Thu, 9 Dec 2021 16:49:08 +0100 Subject: net: ocelot: export ocelot_ifh_port_set() to setup IFH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FDMA will need this code to prepare the injection frame header when sending SKBs. Move this code into ocelot_ifh_port_set() and add conditional IFH setting for vlan and rew op if they are not set. Reviewed-by: Vladimir Oltean Signed-off-by: Clément Léger Signed-off-by: Jakub Kicinski --- include/soc/mscc/ocelot.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 33f2e8c9e88b..9b99cfd39a59 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -794,6 +794,7 @@ void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target, bool ocelot_can_inject(struct ocelot *ocelot, int grp); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 rew_op, struct sk_buff *skb); +void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag); int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp); -- cgit v1.2.3 From b471a71e525c73608a6ae5a3fdd2a5d1224da6a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Thu, 9 Dec 2021 16:49:09 +0100 Subject: net: ocelot: add and export ocelot_ptp_rx_timestamp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to support PTP in FDMA, PTP handling code is needed. Since this is the same as for register-based extraction, export it with a new ocelot_ptp_rx_timestamp() function. Reviewed-by: Vladimir Oltean Signed-off-by: Clément Léger Signed-off-by: Jakub Kicinski --- include/soc/mscc/ocelot.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 9b99cfd39a59..f038062a97a9 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -797,6 +797,8 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag); int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp); +void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb, + u64 timestamp); /* Hardware initialization */ int ocelot_regfields_init(struct ocelot *ocelot, -- cgit v1.2.3 From 753a026cfec1429c9e32e004ae4d4c2727cc0111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Thu, 9 Dec 2021 16:49:11 +0100 Subject: net: ocelot: add FDMA support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ethernet frames can be extracted or injected autonomously to or from the device’s DDR3/DDR3L memory and/or PCIe memory space. Linked list data structures in memory are used for injecting or extracting Ethernet frames. The FDMA generates interrupts when frame extraction or injection is done and when the linked lists need updating. The FDMA is shared between all the ethernet ports of the switch and uses a linked list of descriptors (DCB) to inject and extract packets. Before adding descriptors, the FDMA channels must be stopped. It would be inefficient to do that each time a descriptor would be added so the channels are restarted only once they stopped. Both channels uses ring-like structure to feed the DCBs to the FDMA. head and tail are never touched by hardware and are completely handled by the driver. On top of that, page recycling has been added and is mostly taken from gianfar driver. Reviewed-by: Vladimir Oltean Co-developed-by: Alexandre Belloni Signed-off-by: Alexandre Belloni Signed-off-by: Clément Léger Signed-off-by: Jakub Kicinski --- include/soc/mscc/ocelot.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index f038062a97a9..3e9454b00562 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -118,6 +118,7 @@ enum ocelot_target { S2, HSIO, PTP, + FDMA, GCB, DEV_GMII, TARGET_MAX, @@ -732,6 +733,8 @@ struct ocelot { /* Protects the PTP clock */ spinlock_t ptp_clock_lock; struct ptp_pin_desc ptp_pins[OCELOT_PTP_PINS_NUM]; + + struct ocelot_fdma *fdma; }; struct ocelot_policer { -- cgit v1.2.3 From bff8c3848e071d387d8b0784dc91fa49cd563774 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Nov 2021 11:01:03 +0100 Subject: bitfield.h: Fix "type of reg too small for mask" test The test: 'mask > (typeof(_reg))~0ull' only works correctly when both sides are unsigned, consider: - 0xff000000 vs (int)~0ull - 0x000000ff vs (int)~0ull Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211110101324.950210584@infradead.org --- include/linux/bitfield.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 4e035aca6f7e..6093fa6db260 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -41,6 +41,22 @@ #define __bf_shf(x) (__builtin_ffsll(x) - 1) +#define __scalar_type_to_unsigned_cases(type) \ + unsigned type: (unsigned type)0, \ + signed type: (unsigned type)0 + +#define __unsigned_scalar_typeof(x) typeof( \ + _Generic((x), \ + char: (unsigned char)0, \ + __scalar_type_to_unsigned_cases(char), \ + __scalar_type_to_unsigned_cases(short), \ + __scalar_type_to_unsigned_cases(int), \ + __scalar_type_to_unsigned_cases(long), \ + __scalar_type_to_unsigned_cases(long long), \ + default: (x))) + +#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x)) + #define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ ({ \ BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ @@ -49,7 +65,8 @@ BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ _pfx "value too large for the field"); \ - BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \ + BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ + __bf_cast_unsigned(_reg, ~0ull), \ _pfx "type of reg too small for mask"); \ __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ (1ULL << __bf_shf(_mask))); \ -- cgit v1.2.3 From 4121485d271bd730537f613ce041e7ea659606a7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 9 Dec 2021 21:52:31 +0200 Subject: PCI: Sort Intel Device IDs by value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sort Intel Device IDs by value. [bhelgaas: lower-case Intel section since we're touching it anyway] Link: https://lore.kernel.org/r/20211209195231.2785-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Bjorn Helgaas Reviewed-by: Krzysztof Wilczyński --- include/linux/pci_ids.h | 50 ++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 011f2f1ea5bb..0d26ab7eb7dc 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2635,8 +2635,8 @@ #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -#define PCI_DEVICE_ID_INTEL_PXH_1 0x032A -#define PCI_DEVICE_ID_INTEL_PXHV 0x032C +#define PCI_DEVICE_ID_INTEL_PXH_1 0x032a +#define PCI_DEVICE_ID_INTEL_PXHV 0x032c #define PCI_DEVICE_ID_INTEL_80332_0 0x0330 #define PCI_DEVICE_ID_INTEL_80332_1 0x0332 #define PCI_DEVICE_ID_INTEL_80333_0 0x0370 @@ -2654,14 +2654,14 @@ #define PCI_DEVICE_ID_INTEL_MFD_SDIO2 0x0822 #define PCI_DEVICE_ID_INTEL_MFD_EMMC0 0x0823 #define PCI_DEVICE_ID_INTEL_MFD_EMMC1 0x0824 -#define PCI_DEVICE_ID_INTEL_MRST_SD2 0x084F -#define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB 0x095E +#define PCI_DEVICE_ID_INTEL_MRST_SD2 0x084f +#define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB 0x095e #define PCI_DEVICE_ID_INTEL_I960 0x0960 #define PCI_DEVICE_ID_INTEL_I960RM 0x0962 #define PCI_DEVICE_ID_INTEL_CENTERTON_ILB 0x0c60 #define PCI_DEVICE_ID_INTEL_8257X_SOL 0x1062 #define PCI_DEVICE_ID_INTEL_82573E_SOL 0x1085 -#define PCI_DEVICE_ID_INTEL_82573L_SOL 0x108F +#define PCI_DEVICE_ID_INTEL_82573L_SOL 0x108f #define PCI_DEVICE_ID_INTEL_82815_MC 0x1130 #define PCI_DEVICE_ID_INTEL_82815_CGC 0x1132 #define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221 @@ -2755,12 +2755,6 @@ #define PCI_DEVICE_ID_INTEL_82801EB_11 0x24db #define PCI_DEVICE_ID_INTEL_82801EB_12 0x24dc #define PCI_DEVICE_ID_INTEL_82801EB_13 0x24dd -#define PCI_DEVICE_ID_INTEL_ESB_1 0x25a1 -#define PCI_DEVICE_ID_INTEL_ESB_2 0x25a2 -#define PCI_DEVICE_ID_INTEL_ESB_4 0x25a4 -#define PCI_DEVICE_ID_INTEL_ESB_5 0x25a6 -#define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab -#define PCI_DEVICE_ID_INTEL_ESB_10 0x25ac #define PCI_DEVICE_ID_INTEL_82820_HB 0x2500 #define PCI_DEVICE_ID_INTEL_82820_UP_HB 0x2501 #define PCI_DEVICE_ID_INTEL_82850_HB 0x2530 @@ -2775,14 +2769,15 @@ #define PCI_DEVICE_ID_INTEL_82915G_IG 0x2582 #define PCI_DEVICE_ID_INTEL_82915GM_HB 0x2590 #define PCI_DEVICE_ID_INTEL_82915GM_IG 0x2592 -#define PCI_DEVICE_ID_INTEL_5000_ERR 0x25F0 -#define PCI_DEVICE_ID_INTEL_5000_FBD0 0x25F5 -#define PCI_DEVICE_ID_INTEL_5000_FBD1 0x25F6 -#define PCI_DEVICE_ID_INTEL_82945G_HB 0x2770 -#define PCI_DEVICE_ID_INTEL_82945G_IG 0x2772 -#define PCI_DEVICE_ID_INTEL_3000_HB 0x2778 -#define PCI_DEVICE_ID_INTEL_82945GM_HB 0x27A0 -#define PCI_DEVICE_ID_INTEL_82945GM_IG 0x27A2 +#define PCI_DEVICE_ID_INTEL_ESB_1 0x25a1 +#define PCI_DEVICE_ID_INTEL_ESB_2 0x25a2 +#define PCI_DEVICE_ID_INTEL_ESB_4 0x25a4 +#define PCI_DEVICE_ID_INTEL_ESB_5 0x25a6 +#define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab +#define PCI_DEVICE_ID_INTEL_ESB_10 0x25ac +#define PCI_DEVICE_ID_INTEL_5000_ERR 0x25f0 +#define PCI_DEVICE_ID_INTEL_5000_FBD0 0x25f5 +#define PCI_DEVICE_ID_INTEL_5000_FBD1 0x25f6 #define PCI_DEVICE_ID_INTEL_ICH6_0 0x2640 #define PCI_DEVICE_ID_INTEL_ICH6_1 0x2641 #define PCI_DEVICE_ID_INTEL_ICH6_2 0x2642 @@ -2794,6 +2789,11 @@ #define PCI_DEVICE_ID_INTEL_ESB2_14 0x2698 #define PCI_DEVICE_ID_INTEL_ESB2_17 0x269b #define PCI_DEVICE_ID_INTEL_ESB2_18 0x269e +#define PCI_DEVICE_ID_INTEL_82945G_HB 0x2770 +#define PCI_DEVICE_ID_INTEL_82945G_IG 0x2772 +#define PCI_DEVICE_ID_INTEL_3000_HB 0x2778 +#define PCI_DEVICE_ID_INTEL_82945GM_HB 0x27a0 +#define PCI_DEVICE_ID_INTEL_82945GM_IG 0x27a2 #define PCI_DEVICE_ID_INTEL_ICH7_0 0x27b8 #define PCI_DEVICE_ID_INTEL_ICH7_1 0x27b9 #define PCI_DEVICE_ID_INTEL_ICH7_30 0x27b0 @@ -2846,7 +2846,7 @@ #define PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_PHY0 0x2c91 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR 0x2c98 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD 0x2c99 -#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST 0x2c9C +#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST 0x2c9c #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL 0x2ca0 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR 0x2ca1 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK 0x2ca2 @@ -2958,16 +2958,16 @@ #define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */ #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f -#define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 -#define PCI_DEVICE_ID_INTEL_5100_19 0x65f3 -#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 -#define PCI_DEVICE_ID_INTEL_5100_22 0x65f6 #define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030 #define PCI_DEVICE_ID_INTEL_5400_FBD0 0x4035 #define PCI_DEVICE_ID_INTEL_5400_FBD1 0x4036 -#define PCI_DEVICE_ID_INTEL_IOAT_SCNB 0x65ff #define PCI_DEVICE_ID_INTEL_EP80579_0 0x5031 #define PCI_DEVICE_ID_INTEL_EP80579_1 0x5032 +#define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 +#define PCI_DEVICE_ID_INTEL_5100_19 0x65f3 +#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 +#define PCI_DEVICE_ID_INTEL_5100_22 0x65f6 +#define PCI_DEVICE_ID_INTEL_IOAT_SCNB 0x65ff #define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000 #define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010 #define PCI_DEVICE_ID_INTEL_82371SB_2 0x7020 -- cgit v1.2.3 From 0f09c274698590d508c43f924d9dffc7130b782d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 3 Dec 2021 09:07:56 +0100 Subject: futex: Fix additional regressions Naresh reported another architecture that was broken by the same typo that was already fixed for three architectures: mips also refers to the futex_atomic_op_inuser_local() function by the wrong name and runs into a missing closing '}' as well. Going through the source tree the same typo was found in the documentation as well as in the xtensa code, both of which ended up escaping the regression testing so far. In the case of xtensa, it appears that the broken code path is only used when building for platforms that are not supported by the default gcc configuration, so they are impossible to test for with default setups. After going through these more carefully and fixing up the typos, all architectures have been build-tested again to ensure that this is now complete. Fixes: 4e0d84634445 ("futex: Fix sparc32/m68k/nds32 build regression") Fixes: 3f2bedabb62c ("futex: Ensure futex_atomic_cmpxchg_inatomic() is present") Reported-by: Linux Kernel Functional Testing Reported-by: Naresh Kamboju Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20211203080823.2938839-1-arnd@kernel.org --- include/asm-generic/futex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index 66d6843bfd02..2a19215baae5 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -21,7 +21,7 @@ #endif /** - * arch_futex_atomic_op_inuser_local() - Atomic arithmetic operation with constant + * futex_atomic_op_inuser_local() - Atomic arithmetic operation with constant * argument and comparison of the previous * futex value with another constant. * -- cgit v1.2.3 From c5fb19937455095573a19ddcbff32e993ed10e35 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 10 Dec 2021 22:16:49 +0800 Subject: bpf: Add bpf_strncmp helper The helper compares two strings: one string is a null-terminated read-only string, and another string has const max storage size but doesn't need to be null-terminated. It can be used to compare file name in tracing or LSM program. Signed-off-by: Hou Tao Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211210141652.877186-2-houtao1@huawei.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 11 +++++++++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0ceb54c6342f..7a40022e3d00 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2163,6 +2163,7 @@ extern const struct bpf_func_proto bpf_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; +extern const struct bpf_func_proto bpf_strncmp_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c26871263f1f..2820c77e4846 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4983,6 +4983,16 @@ union bpf_attr { * Return * The number of loops performed, **-EINVAL** for invalid **flags**, * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. + * + * long bpf_strncmp(const char *s1, u32 s1_sz, const char *s2) + * Description + * Do strncmp() between **s1** and **s2**. **s1** doesn't need + * to be null-terminated and **s1_sz** is the maximum storage + * size of **s1**. **s2** must be a read-only string. + * Return + * An integer less than, equal to, or greater than zero + * if the first **s1_sz** bytes of **s1** is found to be + * less than, to match, or be greater than **s2**. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5167,6 +5177,7 @@ union bpf_attr { FN(kallsyms_lookup_name), \ FN(find_vma), \ FN(loop), \ + FN(strncmp), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From dc452a471dbae8aca8257c565174212620880093 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 10 Dec 2021 01:34:37 +0200 Subject: net: dsa: introduce tagger-owned storage for private and shared data Ansuel is working on register access over Ethernet for the qca8k switch family. This requires the qca8k tagging protocol driver to receive frames which aren't intended for the network stack, but instead for the qca8k switch driver itself. The dp->priv is currently the prevailing method for passing data back and forth between the tagging protocol driver and the switch driver. However, this method is riddled with caveats. The DSA design allows in principle for any switch driver to return any protocol it desires in ->get_tag_protocol(). The dsa_loop driver can be modified to do just that. But in the current design, the memory behind dp->priv has to be allocated by the switch driver, so if the tagging protocol is paired to an unexpected switch driver, we may end up in NULL pointer dereferences inside the kernel, or worse (a switch driver may allocate dp->priv according to the expectations of a different tagger). The latter possibility is even more plausible considering that DSA switches can dynamically change tagging protocols in certain cases (dsa <-> edsa, ocelot <-> ocelot-8021q), and the current design lends itself to mistakes that are all too easy to make. This patch proposes that the tagging protocol driver should manage its own memory, instead of relying on the switch driver to do so. After analyzing the different in-tree needs, it can be observed that the required tagger storage is per switch, therefore a ds->tagger_data pointer is introduced. In principle, per-port storage could also be introduced, although there is no need for it at the moment. Future changes will replace the current usage of dp->priv with ds->tagger_data. We define a "binding" event between the DSA switch tree and the tagging protocol. During this binding event, the tagging protocol's ->connect() method is called first, and this may allocate some memory for each switch of the tree. Then a cross-chip notifier is emitted for the switches within that tree, and they are given the opportunity to fix up the tagger's memory (for example, they might set up some function pointers that represent virtual methods for consuming packets). Because the memory is owned by the tagger, there exists a ->disconnect() method for the tagger (which is the place to free the resources), but there doesn't exist a ->disconnect() method for the switch driver. This is part of the design. The switch driver should make minimal use of the public part of the tagger data, and only after type-checking it using the supplied "proto" argument. In the code there are in fact two binding events, one is the initial event in dsa_switch_setup_tag_protocol(). At this stage, the cross chip notifier chains aren't initialized, so we call each switch's connect() method by hand. Then there is dsa_tree_bind_tag_proto() during dsa_tree_change_tag_proto(), and here we have an old protocol and a new one. We first connect to the new one before disconnecting from the old one, to simplify error handling a bit and to ensure we remain in a valid state at all times. Co-developed-by: Ansuel Smith Signed-off-by: Ansuel Smith Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index bdf308a5c55e..8b496c7e62ef 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -82,12 +82,15 @@ enum dsa_tag_protocol { }; struct dsa_switch; +struct dsa_switch_tree; struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); + int (*connect)(struct dsa_switch_tree *dst); + void (*disconnect)(struct dsa_switch_tree *dst); unsigned int needed_headroom; unsigned int needed_tailroom; const char *name; @@ -337,6 +340,8 @@ struct dsa_switch { */ void *priv; + void *tagger_data; + /* * Configuration data for this switch. */ @@ -689,6 +694,13 @@ struct dsa_switch_ops { enum dsa_tag_protocol mprot); int (*change_tag_protocol)(struct dsa_switch *ds, int port, enum dsa_tag_protocol proto); + /* + * Method for switch drivers to connect to the tagging protocol driver + * in current use. The switch driver can provide handlers for certain + * types of packets for switch management. + */ + int (*connect_tag_protocol)(struct dsa_switch *ds, + enum dsa_tag_protocol proto); /* Optional switch-wide initialization and destruction methods */ int (*setup)(struct dsa_switch *ds); -- cgit v1.2.3 From 35d976802124303a5b3eb7ec3ed188d568204373 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 10 Dec 2021 01:34:38 +0200 Subject: net: dsa: tag_ocelot: convert to tagger-owned data The felix driver makes very light use of dp->priv, and the tagger is effectively stateless. dp->priv is practically only needed to set up a callback to perform deferred xmit of PTP and STP packets using the ocelot-8021q tagging protocol (the main ocelot tagging protocol makes no use of dp->priv, although this driver sets up dp->priv irrespective of actual tagging protocol in use). struct felix_port (what used to be pointed to by dp->priv) is removed and replaced with a two-sided structure. The public side of this structure, visible to the switch driver, is ocelot_8021q_tagger_data. The private side is ocelot_8021q_tagger_private, and the latter structure physically encapsulates the former. The public half of the tagger data structure can be accessed through a helper of the same name (ocelot_8021q_tagger_data) which also sanity-checks the protocol currently in use by the switch. The public/private split was requested by Andrew Lunn. Suggested-by: Andrew Lunn Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/ocelot.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index 7ee708ad7df2..dca2969015d8 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -8,6 +8,7 @@ #include #include #include +#include struct ocelot_skb_cb { struct sk_buff *clone; @@ -168,11 +169,18 @@ struct felix_deferred_xmit_work { struct kthread_work work; }; -struct felix_port { +struct ocelot_8021q_tagger_data { void (*xmit_work_fn)(struct kthread_work *work); - struct kthread_worker *xmit_worker; }; +static inline struct ocelot_8021q_tagger_data * +ocelot_8021q_tagger_data(struct dsa_switch *ds) +{ + BUG_ON(ds->dst->tag_ops->proto != DSA_TAG_PROTO_OCELOT_8021Q); + + return ds->tagger_data; +} + static inline void ocelot_xfh_get_rew_val(void *extraction, u64 *rew_val) { packing(extraction, rew_val, 116, 85, OCELOT_TAG_LEN, UNPACK, 0); -- cgit v1.2.3 From d38049bbe7601f38d598f5da5ff09980483b290a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 10 Dec 2021 01:34:40 +0200 Subject: net: dsa: sja1105: bring deferred xmit implementation in line with ocelot-8021q When the ocelot-8021q driver was converted to deferred xmit as part of commit 8d5f7954b7c8 ("net: dsa: felix: break at first CPU port during init and teardown"), the deferred implementation was deliberately made subtly different from what sja1105 has. The implementation differences lied on the following observations: - There might be a race between these two lines in tag_sja1105.c: skb_queue_tail(&sp->xmit_queue, skb_get(skb)); kthread_queue_work(sp->xmit_worker, &sp->xmit_work); and the skb dequeue logic in sja1105_port_deferred_xmit(). For example, the xmit_work might be already queued, however the work item has just finished walking through the skb queue. Because we don't check the return code from kthread_queue_work, we don't do anything if the work item is already queued. However, nobody will take that skb and send it, at least until the next timestampable skb is sent. This creates additional (and avoidable) TX timestamping latency. To close that race, what the ocelot-8021q driver does is it doesn't keep a single work item per port, and a skb timestamping queue, but rather dynamically allocates a work item per packet. - It is also unnecessary to have more than one kthread that does the work. So delete the per-port kthread allocations and replace them with a single kthread which is global to the switch. This change brings the two implementations in line by applying those observations to the sja1105 driver as well. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/sja1105.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index e6c78be40bde..acd9d2afccab 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -37,6 +37,12 @@ #define SJA1105_HWTS_RX_EN 0 +struct sja1105_deferred_xmit_work { + struct dsa_port *dp; + struct sk_buff *skb; + struct kthread_work work; +}; + /* Global tagger data: each struct sja1105_port has a reference to * the structure defined in struct sja1105_private. */ @@ -52,6 +58,8 @@ struct sja1105_tagger_data { * 2-step TX timestamps */ struct sk_buff_head skb_txtstamp_queue; + struct kthread_worker *xmit_worker; + void (*xmit_work_fn)(struct kthread_work *work); }; struct sja1105_skb_cb { @@ -65,9 +73,6 @@ struct sja1105_skb_cb { ((struct sja1105_skb_cb *)((skb)->cb)) struct sja1105_port { - struct kthread_worker *xmit_worker; - struct kthread_work xmit_work; - struct sk_buff_head xmit_queue; struct sja1105_tagger_data *data; bool hwts_tx_en; }; -- cgit v1.2.3 From 6f6770ab1ce2b56619264ec6be0b62f05564dcf6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 10 Dec 2021 01:34:41 +0200 Subject: net: dsa: sja1105: remove hwts_tx_en from tagger data This tagger property is in fact not used at all by the tagger, only by the switch driver. Therefore it makes sense to be moved to sja1105_private. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/sja1105.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index acd9d2afccab..32a8a1344cf6 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -74,7 +74,6 @@ struct sja1105_skb_cb { struct sja1105_port { struct sja1105_tagger_data *data; - bool hwts_tx_en; }; /* Timestamps are in units of 8 ns clock ticks (equivalent to -- cgit v1.2.3 From bfcf1425222008e7390c0784b0f3bb7b497fccaa Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 10 Dec 2021 01:34:42 +0200 Subject: net: dsa: sja1105: make dp->priv point directly to sja1105_tagger_data The design of the sja1105 tagger dp->priv is that each port has a separate struct sja1105_port, and the sp->data pointer points to a common struct sja1105_tagger_data. We have removed all per-port members accessible by the tagger, and now only struct sja1105_tagger_data remains. Make dp->priv point directly to this. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/sja1105.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 32a8a1344cf6..1dda9cce85d9 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -43,9 +43,7 @@ struct sja1105_deferred_xmit_work { struct kthread_work work; }; -/* Global tagger data: each struct sja1105_port has a reference to - * the structure defined in struct sja1105_private. - */ +/* Global tagger data */ struct sja1105_tagger_data { struct sk_buff *stampable_skb; /* Protects concurrent access to the meta state machine @@ -72,10 +70,6 @@ struct sja1105_skb_cb { #define SJA1105_SKB_CB(skb) \ ((struct sja1105_skb_cb *)((skb)->cb)) -struct sja1105_port { - struct sja1105_tagger_data *data; -}; - /* Timestamps are in units of 8 ns clock ticks (equivalent to * a fixed 125 MHz clock). */ -- cgit v1.2.3 From 22ee9f8e4011fb8a6d75dc2f9a43360d4f400235 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 10 Dec 2021 01:34:43 +0200 Subject: net: dsa: sja1105: move ts_id from sja1105_tagger_data The TX timestamp ID is incremented by the SJA1110 PTP timestamping callback (->port_tx_timestamp) for every packet, when cloning it. It isn't used by the tagger at all, even though it sits inside the struct sja1105_tagger_data. Also, serialization to this structure is currently done through tagger_data->meta_lock, which is a cheap hack because the meta_lock isn't used for anything else on SJA1110 (sja1105_rcv_meta_state_machine isn't called). This change moves ts_id from sja1105_tagger_data to sja1105_private and introduces a dedicated spinlock for it, also in sja1105_private. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/sja1105.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 1dda9cce85d9..d8ee53085c09 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -51,7 +51,6 @@ struct sja1105_tagger_data { */ spinlock_t meta_lock; unsigned long state; - u8 ts_id; /* Used on SJA1110 where meta frames are generated only for * 2-step TX timestamps */ -- cgit v1.2.3 From c79e84866d2ac637fce921a28288f214e91d662b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 10 Dec 2021 01:34:44 +0200 Subject: net: dsa: tag_sja1105: convert to tagger-owned data Currently, struct sja1105_tagger_data is a part of struct sja1105_private, and is used by the sja1105 driver to populate dp->priv. With the movement towards tagger-owned storage, the sja1105 driver should not be the owner of this memory. This change implements the connection between the sja1105 switch driver and its tagging protocol, which means that sja1105_tagger_data no longer stays in dp->priv but in ds->tagger_data, and that the sja1105 driver now only populates the sja1105_port_deferred_xmit callback pointer. The kthread worker is now the responsibility of the tagger. The sja1105 driver also alters the tagger's state some more, especially with regard to the PTP RX timestamping state. This will be fixed up a bit in further changes. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/sja1105.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index d8ee53085c09..9f7d42cbbc08 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -84,9 +84,12 @@ static inline s64 sja1105_ticks_to_ns(s64 ticks) return ticks * SJA1105_TICK_NS; } -static inline bool dsa_port_is_sja1105(struct dsa_port *dp) +static inline struct sja1105_tagger_data * +sja1105_tagger_data(struct dsa_switch *ds) { - return true; + BUG_ON(ds->dst->tag_ops->proto != DSA_TAG_PROTO_SJA1105); + + return ds->tagger_data; } #endif /* _NET_DSA_SJA1105_H */ -- cgit v1.2.3 From fcbf979a5b4b5784bfb5647ae6190cd5c2ae595d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 10 Dec 2021 01:34:45 +0200 Subject: Revert "net: dsa: move sja1110_process_meta_tstamp inside the tagging protocol driver" This reverts commit 6d709cadfde68dbd12bef12fcced6222226dcb06. The above change was done to avoid calling symbols exported by the switch driver from the tagging protocol driver. With the tagger-owned storage model, we have a new option on our hands, and that is for the switch driver to provide a data consumer handler in the form of a function pointer inside the ->connect_tag_protocol() method. Having a function pointer avoids the problems of the exported symbols approach. By creating a handler for metadata frames holding TX timestamps on SJA1110, we are able to eliminate an skb queue from the tagger data, and replace it with a simple, and stateless, function pointer. This skb queue is now handled exclusively by sja1105_ptp.c, which makes the code easier to follow, as it used to be before the reverted patch. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/sja1105.h | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 9f7d42cbbc08..d216211b64f8 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -37,6 +37,11 @@ #define SJA1105_HWTS_RX_EN 0 +enum sja1110_meta_tstamp { + SJA1110_META_TSTAMP_TX = 0, + SJA1110_META_TSTAMP_RX = 1, +}; + struct sja1105_deferred_xmit_work { struct dsa_port *dp; struct sk_buff *skb; @@ -51,12 +56,10 @@ struct sja1105_tagger_data { */ spinlock_t meta_lock; unsigned long state; - /* Used on SJA1110 where meta frames are generated only for - * 2-step TX timestamps - */ - struct sk_buff_head skb_txtstamp_queue; struct kthread_worker *xmit_worker; void (*xmit_work_fn)(struct kthread_work *work); + void (*meta_tstamp_handler)(struct dsa_switch *ds, int port, u8 ts_id, + enum sja1110_meta_tstamp dir, u64 tstamp); }; struct sja1105_skb_cb { @@ -69,21 +72,6 @@ struct sja1105_skb_cb { #define SJA1105_SKB_CB(skb) \ ((struct sja1105_skb_cb *)((skb)->cb)) -/* Timestamps are in units of 8 ns clock ticks (equivalent to - * a fixed 125 MHz clock). - */ -#define SJA1105_TICK_NS 8 - -static inline s64 ns_to_sja1105_ticks(s64 ns) -{ - return ns / SJA1105_TICK_NS; -} - -static inline s64 sja1105_ticks_to_ns(s64 ticks) -{ - return ticks * SJA1105_TICK_NS; -} - static inline struct sja1105_tagger_data * sja1105_tagger_data(struct dsa_switch *ds) { -- cgit v1.2.3 From 950a419d9de13668f86828394cb242a1f9dece74 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 10 Dec 2021 01:34:46 +0200 Subject: net: dsa: tag_sja1105: split sja1105_tagger_data into private and public sections The sja1105 driver messes with the tagging protocol's state when PTP RX timestamping is enabled/disabled. This is fundamentally necessary because the tagger needs to know what to do when it receives a PTP packet. If RX timestamping is enabled, then a metadata follow-up frame is expected, and this holds the (partial) timestamp. So the tagger plays hide-and-seek with the network stack until it also gets the metadata frame, and then presents a single packet, the timestamped PTP packet. But when RX timestamping isn't enabled, there is no metadata frame expected, so the hide-and-seek game must be turned off and the packet must be delivered right away to the network stack. Considering this, we create a pseudo isolation by devising two tagger methods callable by the switch: one to get the RX timestamping state, and one to set it. Since we can't export symbols between the tagger and the switch driver, these methods are exposed through function pointers. After this change, the public portion of the sja1105_tagger_data contains only function pointers. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/sja1105.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index d216211b64f8..e9cb1ae6d742 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -35,8 +35,6 @@ #define SJA1105_META_SMAC 0x222222222222ull #define SJA1105_META_DMAC 0x0180C200000Eull -#define SJA1105_HWTS_RX_EN 0 - enum sja1110_meta_tstamp { SJA1110_META_TSTAMP_TX = 0, SJA1110_META_TSTAMP_RX = 1, @@ -50,16 +48,13 @@ struct sja1105_deferred_xmit_work { /* Global tagger data */ struct sja1105_tagger_data { - struct sk_buff *stampable_skb; - /* Protects concurrent access to the meta state machine - * from taggers running on multiple ports on SMP systems - */ - spinlock_t meta_lock; - unsigned long state; - struct kthread_worker *xmit_worker; + /* Tagger to switch */ void (*xmit_work_fn)(struct kthread_work *work); void (*meta_tstamp_handler)(struct dsa_switch *ds, int port, u8 ts_id, enum sja1110_meta_tstamp dir, u64 tstamp); + /* Switch to tagger */ + bool (*rxtstamp_get_state)(struct dsa_switch *ds); + void (*rxtstamp_set_state)(struct dsa_switch *ds, bool on); }; struct sja1105_skb_cb { -- cgit v1.2.3 From 4f3cb34364e2552ce5f3a8ca33a184c13c6152a5 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 10 Dec 2021 01:34:47 +0200 Subject: net: dsa: remove dp->priv All current in-tree uses of dp->priv have been replaced with ds->tagger_data, which provides for a safer API especially when the connection isn't the regular 1:1 link between one switch driver and one tagging protocol driver, but could be either one switch to many taggers, or many switches to one tagger. Therefore, we can remove this unused pointer. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 8b496c7e62ef..64d71968aa91 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -276,12 +276,6 @@ struct dsa_port { struct list_head list; - /* - * Give the switch driver somewhere to hang its per-port private data - * structures (accessible from the tagger). - */ - void *priv; - /* * Original copy of the master netdev ethtool_ops */ -- cgit v1.2.3 From 3cf3cdea6fe3fdb7a1e4ac1372b80408e4f56b73 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Sun, 5 Dec 2021 13:40:44 +0200 Subject: dt-bindings: iio: add AD74413R The AD74412R and AD74413R are quad-channel, software configurable, input/output solutions for building and process control applications. They contain functionality for analog output, analog input, digital input, resistance temperature detector, and thermocouple measurements integrated into a single chip solution with an SPI interface. The devices feature a 16-bit ADC and four configurable 13-bit DACs to provide four configurable input/output channels and a suite of diagnostic functions. The AD74413R differentiates itself from the AD74412R by being HART-compatible. Signed-off-by: Cosmin Tanislav Reviewed-by: Rob Herring Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20211205114045.173612-3-cosmin.tanislav@analog.com Signed-off-by: Jonathan Cameron --- include/dt-bindings/iio/addac/adi,ad74413r.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/dt-bindings/iio/addac/adi,ad74413r.h (limited to 'include') diff --git a/include/dt-bindings/iio/addac/adi,ad74413r.h b/include/dt-bindings/iio/addac/adi,ad74413r.h new file mode 100644 index 000000000000..204f92bbd79f --- /dev/null +++ b/include/dt-bindings/iio/addac/adi,ad74413r.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _DT_BINDINGS_ADI_AD74413R_H +#define _DT_BINDINGS_ADI_AD74413R_H + +#define CH_FUNC_HIGH_IMPEDANCE 0x0 +#define CH_FUNC_VOLTAGE_OUTPUT 0x1 +#define CH_FUNC_CURRENT_OUTPUT 0x2 +#define CH_FUNC_VOLTAGE_INPUT 0x3 +#define CH_FUNC_CURRENT_INPUT_EXT_POWER 0x4 +#define CH_FUNC_CURRENT_INPUT_LOOP_POWER 0x5 +#define CH_FUNC_RESISTANCE_INPUT 0x6 +#define CH_FUNC_DIGITAL_INPUT_LOGIC 0x7 +#define CH_FUNC_DIGITAL_INPUT_LOOP_POWER 0x8 +#define CH_FUNC_CURRENT_INPUT_EXT_POWER_HART 0x9 +#define CH_FUNC_CURRENT_INPUT_LOOP_POWER_HART 0xA + +#define CH_FUNC_MIN CH_FUNC_HIGH_IMPEDANCE +#define CH_FUNC_MAX CH_FUNC_CURRENT_INPUT_LOOP_POWER_HART + +#endif /* _DT_BINDINGS_ADI_AD74413R_H */ -- cgit v1.2.3 From 9020ef659885f2622cfb386cc229b6d618362895 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 17 Oct 2021 18:22:09 +0100 Subject: iio: trigger: Fix a scheduling whilst atomic issue seen on tsc2046 IIO triggers are software IRQ chips that split an incoming IRQ into separate IRQs routed to all devices using the trigger. When all consumers are done then a trigger callback reenable() is called. There are a few circumstances under which this can happen in atomic context. 1) A single user of the trigger that calls the iio_trigger_done() function from interrupt context. 2) A race between disconnecting the last device from a trigger and the trigger itself sucessfully being disabled. To avoid a resulting scheduling whilst atomic, close this second corner by using schedule_work() to ensure the reenable is not done in atomic context. Note that drivers must be careful to manage the interaction of set_state() and reenable() callbacks to ensure appropriate reference counting if they are relying on the same hardware controls. Deliberately taking this the slow path rather than via a fixes tree because the error has hard to hit and I would like it to soak for a while before hitting a release kernel. Signed-off-by: Jonathan Cameron Cc: Pengutronix Kernel Team Cc: Dmitry Torokhov Tested-by: Oleksij Rempel Cc: Link: https://lore.kernel.org/r/20211017172209.112387-1-jic23@kernel.org --- include/linux/iio/trigger.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index 096f68dd2e0c..4c69b144677b 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -55,6 +55,7 @@ struct iio_trigger_ops { * @attached_own_device:[INTERN] if we are using our own device as trigger, * i.e. if we registered a poll function to the same * device as the one providing the trigger. + * @reenable_work: [INTERN] work item used to ensure reenable can sleep. **/ struct iio_trigger { const struct iio_trigger_ops *ops; @@ -74,6 +75,7 @@ struct iio_trigger { unsigned long pool[BITS_TO_LONGS(CONFIG_IIO_CONSUMERS_PER_TRIGGER)]; struct mutex pool_lock; bool attached_own_device; + struct work_struct reenable_work; }; -- cgit v1.2.3 From 3ac27afefd5dd6a53e830542b899f092a58b6b51 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 5 Dec 2021 17:01:29 +0000 Subject: iio:dac:ad5755: Switch to generic firmware properties and drop pdata Lars pointed out that platform data can also be supported via the generic properties interface, so there is no point in continuing to support it separately. Hence squish the linux/platform_data/ad5755.h header into the c file and drop accessing the platform data directly. Done by inspection only. Mostly completely mechanical with the exception of a few places where default value handling is cleaner done by first setting the value, then calling the firmware reading function but and not checking the return value, as opposed to reading firmware then setting the default if an error occurs. Part of general attempt to move all of IIO over to generic device properties, both to enable other firmware types and to remove drivers that can be the source of of_ specific behaviour in new drivers. Suggested-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko --- include/linux/platform_data/ad5755.h | 102 ----------------------------------- 1 file changed, 102 deletions(-) delete mode 100644 include/linux/platform_data/ad5755.h (limited to 'include') diff --git a/include/linux/platform_data/ad5755.h b/include/linux/platform_data/ad5755.h deleted file mode 100644 index e371e08f04bc..000000000000 --- a/include/linux/platform_data/ad5755.h +++ /dev/null @@ -1,102 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright 2012 Analog Devices Inc. - */ -#ifndef __LINUX_PLATFORM_DATA_AD5755_H__ -#define __LINUX_PLATFORM_DATA_AD5755_H__ - -enum ad5755_mode { - AD5755_MODE_VOLTAGE_0V_5V = 0, - AD5755_MODE_VOLTAGE_0V_10V = 1, - AD5755_MODE_VOLTAGE_PLUSMINUS_5V = 2, - AD5755_MODE_VOLTAGE_PLUSMINUS_10V = 3, - AD5755_MODE_CURRENT_4mA_20mA = 4, - AD5755_MODE_CURRENT_0mA_20mA = 5, - AD5755_MODE_CURRENT_0mA_24mA = 6, -}; - -enum ad5755_dc_dc_phase { - AD5755_DC_DC_PHASE_ALL_SAME_EDGE = 0, - AD5755_DC_DC_PHASE_A_B_SAME_EDGE_C_D_OPP_EDGE = 1, - AD5755_DC_DC_PHASE_A_C_SAME_EDGE_B_D_OPP_EDGE = 2, - AD5755_DC_DC_PHASE_90_DEGREE = 3, -}; - -enum ad5755_dc_dc_freq { - AD5755_DC_DC_FREQ_250kHZ = 0, - AD5755_DC_DC_FREQ_410kHZ = 1, - AD5755_DC_DC_FREQ_650kHZ = 2, -}; - -enum ad5755_dc_dc_maxv { - AD5755_DC_DC_MAXV_23V = 0, - AD5755_DC_DC_MAXV_24V5 = 1, - AD5755_DC_DC_MAXV_27V = 2, - AD5755_DC_DC_MAXV_29V5 = 3, -}; - -enum ad5755_slew_rate { - AD5755_SLEW_RATE_64k = 0, - AD5755_SLEW_RATE_32k = 1, - AD5755_SLEW_RATE_16k = 2, - AD5755_SLEW_RATE_8k = 3, - AD5755_SLEW_RATE_4k = 4, - AD5755_SLEW_RATE_2k = 5, - AD5755_SLEW_RATE_1k = 6, - AD5755_SLEW_RATE_500 = 7, - AD5755_SLEW_RATE_250 = 8, - AD5755_SLEW_RATE_125 = 9, - AD5755_SLEW_RATE_64 = 10, - AD5755_SLEW_RATE_32 = 11, - AD5755_SLEW_RATE_16 = 12, - AD5755_SLEW_RATE_8 = 13, - AD5755_SLEW_RATE_4 = 14, - AD5755_SLEW_RATE_0_5 = 15, -}; - -enum ad5755_slew_step_size { - AD5755_SLEW_STEP_SIZE_1 = 0, - AD5755_SLEW_STEP_SIZE_2 = 1, - AD5755_SLEW_STEP_SIZE_4 = 2, - AD5755_SLEW_STEP_SIZE_8 = 3, - AD5755_SLEW_STEP_SIZE_16 = 4, - AD5755_SLEW_STEP_SIZE_32 = 5, - AD5755_SLEW_STEP_SIZE_64 = 6, - AD5755_SLEW_STEP_SIZE_128 = 7, - AD5755_SLEW_STEP_SIZE_256 = 8, -}; - -/** - * struct ad5755_platform_data - AD5755 DAC driver platform data - * @ext_dc_dc_compenstation_resistor: Whether an external DC-DC converter - * compensation register is used. - * @dc_dc_phase: DC-DC converter phase. - * @dc_dc_freq: DC-DC converter frequency. - * @dc_dc_maxv: DC-DC maximum allowed boost voltage. - * @dac.mode: The mode to be used for the DAC output. - * @dac.ext_current_sense_resistor: Whether an external current sense resistor - * is used. - * @dac.enable_voltage_overrange: Whether to enable 20% voltage output overrange. - * @dac.slew.enable: Whether to enable digital slew. - * @dac.slew.rate: Slew rate of the digital slew. - * @dac.slew.step_size: Slew step size of the digital slew. - **/ -struct ad5755_platform_data { - bool ext_dc_dc_compenstation_resistor; - enum ad5755_dc_dc_phase dc_dc_phase; - enum ad5755_dc_dc_freq dc_dc_freq; - enum ad5755_dc_dc_maxv dc_dc_maxv; - - struct { - enum ad5755_mode mode; - bool ext_current_sense_resistor; - bool enable_voltage_overrange; - struct { - bool enable; - enum ad5755_slew_rate rate; - enum ad5755_slew_step_size step_size; - } slew; - } dac[4]; -}; - -#endif -- cgit v1.2.3 From fb6723daf89083a0d2290f3a0abc777e40766c84 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sat, 29 May 2021 12:33:53 +0900 Subject: ALSA: pcm: comment about relation between msbits hw parameter and [S|U]32 formats Regarding to handling [U|S][32|24] PCM formats, many userspace application developers and driver developers have confusion, since they require them to understand justification or padding. It easily loses consistency and soundness to operate with many type of devices. In this commit, I attempt to solve the situation by adding comment about relation between [S|U]32 formats and 'msbits' hardware parameter. The formats are used for 'left-justified' sample format, and the available bit count in most significant bit is delivered to userspace in msbits hardware parameter (struct snd_pcm_hw_params.msbits), which is decided by msbits constraint added by pcm drivers (snd_pcm_hw_constraint_msbits()). In driver side, the msbits constraint includes two elements; the physical width of format and the available width of the format in most significant bit. The former is used to match SAMPLE_BITS of format. (For my convenience, I ignore wildcard in the usage of the constraint.) As a result of interaction between ALSA pcm core and ALSA pcm application, when the format in which SAMPLE_BITS equals to physical width of the msbits constaint, the msbits parameter is set by referring to the available width of the constraint. When the msbits parameter is not changed in the above process, ALSA pcm core set it alternatively with SAMPLE_BIT of chosen format. In userspace application side, the msbits is only available after calling ioctl(2) with SNDRV_PCM_IOCTL_HW_PARAMS request. Even if the hardware parameter structure includes somewhat value of SAMPLE_BITS interval parameter as width of format, all of the width is not always available since msbits can be less than the width. I note that [S|U]24 formats are used for 'right-justified' 24 bit sample formats within 32 bit frame. The first byte in most significant bit should be invalidated. Although the msbits exposed to userspace should be zero as invalid value, actually it is 32 from physical width of format. [ corrected typos -- tiwai ] Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20210529033353.21641-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 3 +++ include/uapi/sound/asound.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 33451f8ff755..9b187d86e1bd 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -147,6 +147,9 @@ struct snd_pcm_ops { #define SNDRV_PCM_FMTBIT_S24_BE _SNDRV_PCM_FMTBIT(S24_BE) #define SNDRV_PCM_FMTBIT_U24_LE _SNDRV_PCM_FMTBIT(U24_LE) #define SNDRV_PCM_FMTBIT_U24_BE _SNDRV_PCM_FMTBIT(U24_BE) +// For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the +// available bit count in most significant bit. It's for the case of so-called 'left-justified' or +// `right-padding` sample which has less width than 32 bit. #define SNDRV_PCM_FMTBIT_S32_LE _SNDRV_PCM_FMTBIT(S32_LE) #define SNDRV_PCM_FMTBIT_S32_BE _SNDRV_PCM_FMTBIT(S32_BE) #define SNDRV_PCM_FMTBIT_U32_LE _SNDRV_PCM_FMTBIT(U32_LE) diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 5fbb79e30819..cf1d20e34167 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -202,6 +202,9 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_S24_BE ((__force snd_pcm_format_t) 7) /* low three bytes */ #define SNDRV_PCM_FORMAT_U24_LE ((__force snd_pcm_format_t) 8) /* low three bytes */ #define SNDRV_PCM_FORMAT_U24_BE ((__force snd_pcm_format_t) 9) /* low three bytes */ +// For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the +// available bit count in most significant bit. It's for the case of so-called 'left-justified' or +// `right-padding` sample which has less width than 32 bit. #define SNDRV_PCM_FORMAT_S32_LE ((__force snd_pcm_format_t) 10) #define SNDRV_PCM_FORMAT_S32_BE ((__force snd_pcm_format_t) 11) #define SNDRV_PCM_FORMAT_U32_LE ((__force snd_pcm_format_t) 12) -- cgit v1.2.3 From 55b71f6c29f2a78af42dd453dfed895eba516cb4 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 13 Dec 2021 17:12:57 +0900 Subject: ALSA: uapi: use C90 comment style instead of C99 style UAPI headers are built with compiler option for C90, thus double-slashes comment introduced in C99 is not preferable. Fixes: fb6723daf890 ("ALSA: pcm: comment about relation between msbits hw parameter and [S|U]32 formats") Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20211213081257.36097-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- include/uapi/sound/asound.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index cf1d20e34167..1834f58b8ede 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -202,9 +202,11 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_S24_BE ((__force snd_pcm_format_t) 7) /* low three bytes */ #define SNDRV_PCM_FORMAT_U24_LE ((__force snd_pcm_format_t) 8) /* low three bytes */ #define SNDRV_PCM_FORMAT_U24_BE ((__force snd_pcm_format_t) 9) /* low three bytes */ -// For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the -// available bit count in most significant bit. It's for the case of so-called 'left-justified' or -// `right-padding` sample which has less width than 32 bit. +/* + * For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the + * available bit count in most significant bit. It's for the case of so-called 'left-justified' or + * `right-padding` sample which has less width than 32 bit. + */ #define SNDRV_PCM_FORMAT_S32_LE ((__force snd_pcm_format_t) 10) #define SNDRV_PCM_FORMAT_S32_BE ((__force snd_pcm_format_t) 11) #define SNDRV_PCM_FORMAT_U32_LE ((__force snd_pcm_format_t) 12) -- cgit v1.2.3 From 9d9bcae47fd5a0b827521f65ab7d10a218eacc37 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 3 Dec 2021 11:28:44 +0100 Subject: ACPI: delay enumeration of devices with a _DEP pointing to an INT3472 device The clk and regulator frameworks expect clk/regulator consumer-devices to have info about the consumed clks/regulators described in the device's fw_node. To work around cases where this info is not present in the firmware tables, which is often the case on x86/ACPI devices, both frameworks allow the provider-driver to attach info about consumers to the clks/regulators when registering these. This causes problems with the probe ordering wrt drivers for consumers of these clks/regulators. Since the lookups are only registered when the provider-driver binds, trying to get these clks/regulators before then results in a -ENOENT error for clks and a dummy regulator for regulators. One case where we hit this issue is camera sensors such as e.g. the OV8865 sensor found on the Microsoft Surface Go. The sensor uses clks, regulators and GPIOs provided by a TPS68470 PMIC which is described in an INT3472 ACPI device. There is special platform code handling this and setting platform_data with the necessary consumer info on the MFD cells instantiated for the PMIC under: drivers/platform/x86/intel/int3472. For this to work properly the ov8865 driver must not bind to the I2C-client for the OV8865 sensor until after the TPS68470 PMIC gpio, regulator and clk MFD cells have all been fully setup. The OV8865 on the Microsoft Surface Go is just one example, all X86 devices using the Intel IPU3 camera block found on recent Intel SoCs have similar issues where there is an INT3472 HID ACPI-device, which describes the clks and regulators, and the driver for this INT3472 device must be fully initialized before the sensor driver (any sensor driver) binds for things to work properly. On these devices the ACPI nodes describing the sensors all have a _DEP dependency on the matching INT3472 ACPI device (there is one per sensor). This allows solving the probe-ordering problem by delaying the enumeration (instantiation of the I2C-client in the ov8865 example) of ACPI-devices which have a _DEP dependency on an INT3472 device. The new acpi_dev_ready_for_enumeration() helper used for this is also exported because for devices, which have the enumeration_by_parent flag set, the parent-driver will do its own scan of child ACPI devices and it will try to enumerate those during its probe(). Code doing this such as e.g. the i2c-core-acpi.c code must call this new helper to ensure that it too delays the enumeration until all the _DEP dependencies are met on devices which have the new honor_deps flag set. Acked-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211203102857.44539-2-hdegoede@redhat.com --- include/acpi/acpi_bus.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 480f9207a4c6..2f93ecf05dac 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -202,7 +202,8 @@ struct acpi_device_flags { u32 coherent_dma:1; u32 cca_seen:1; u32 enumeration_by_parent:1; - u32 reserved:19; + u32 honor_deps:1; + u32 reserved:18; }; /* File System */ @@ -285,6 +286,7 @@ struct acpi_dep_data { struct list_head node; acpi_handle supplier; acpi_handle consumer; + bool honor_dep; }; /* Performance Management */ @@ -693,6 +695,7 @@ static inline bool acpi_device_can_poweroff(struct acpi_device *adev) bool acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2); void acpi_dev_clear_dependencies(struct acpi_device *supplier); +bool acpi_dev_ready_for_enumeration(const struct acpi_device *device); struct acpi_device *acpi_dev_get_first_consumer_dev(struct acpi_device *supplier); struct acpi_device * acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const char *uid, s64 hrv); -- cgit v1.2.3 From c537be0bfad6337f2afd618fe252c03217191405 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 3 Dec 2021 11:28:46 +0100 Subject: i2c: acpi: Add i2c_acpi_new_device_by_fwnode() function Change i2c_acpi_new_device() into i2c_acpi_new_device_by_fwnode() and add a static inline wrapper providing the old i2c_acpi_new_device() behavior. This is necessary because in some cases we may only have access to the fwnode / acpi_device and not to the matching physical-node struct device *. Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Acked-by: Mika Westerberg Acked-by: Wolfram Sang Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211203102857.44539-4-hdegoede@redhat.com --- include/linux/i2c.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 16119ac1aa97..7d4f52ceb7b5 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -1025,8 +1025,9 @@ bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c); int i2c_acpi_client_count(struct acpi_device *adev); u32 i2c_acpi_find_bus_speed(struct device *dev); -struct i2c_client *i2c_acpi_new_device(struct device *dev, int index, - struct i2c_board_info *info); +struct i2c_client *i2c_acpi_new_device_by_fwnode(struct fwnode_handle *fwnode, + int index, + struct i2c_board_info *info); struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle); bool i2c_acpi_waive_d0_probe(struct device *dev); #else @@ -1043,8 +1044,9 @@ static inline u32 i2c_acpi_find_bus_speed(struct device *dev) { return 0; } -static inline struct i2c_client *i2c_acpi_new_device(struct device *dev, - int index, struct i2c_board_info *info) +static inline struct i2c_client *i2c_acpi_new_device_by_fwnode( + struct fwnode_handle *fwnode, int index, + struct i2c_board_info *info) { return ERR_PTR(-ENODEV); } @@ -1058,4 +1060,11 @@ static inline bool i2c_acpi_waive_d0_probe(struct device *dev) } #endif /* CONFIG_ACPI */ +static inline struct i2c_client *i2c_acpi_new_device(struct device *dev, + int index, + struct i2c_board_info *info) +{ + return i2c_acpi_new_device_by_fwnode(dev_fwnode(dev), index, info); +} + #endif /* _LINUX_I2C_H */ -- cgit v1.2.3 From 9dfa374cc6d04d2515adc21c39e356b64ee45a29 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 3 Dec 2021 11:28:47 +0100 Subject: platform_data: Add linux/platform_data/tps68470.h file The clk and regulator frameworks expect clk/regulator consumer-devices to have info about the consumed clks/regulators described in the device's fw_node. To work around cases where this info is not present in the firmware tables, which is often the case on x86/ACPI devices, both frameworks allow the provider-driver to attach info about consumers to the provider-device during probe/registration of the provider device. The TI TPS68470 PMIC is used x86/ACPI devices with the consumer-info missing from the ACPI tables. Thus the tps68470-clk and tps68470-regulator drivers must provide the consumer-info at probe time. Define tps68470_clk_platform_data and tps68470_regulator_platform_data structs to allow the x86 platform code to pass the necessary consumer info to these drivers. Reviewed-by: Andy Shevchenko Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211203102857.44539-5-hdegoede@redhat.com --- include/linux/platform_data/tps68470.h | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 include/linux/platform_data/tps68470.h (limited to 'include') diff --git a/include/linux/platform_data/tps68470.h b/include/linux/platform_data/tps68470.h new file mode 100644 index 000000000000..126d082c3f2e --- /dev/null +++ b/include/linux/platform_data/tps68470.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * TI TPS68470 PMIC platform data definition. + * + * Copyright (c) 2021 Red Hat Inc. + * + * Red Hat authors: + * Hans de Goede + */ +#ifndef __PDATA_TPS68470_H +#define __PDATA_TPS68470_H + +enum tps68470_regulators { + TPS68470_CORE, + TPS68470_ANA, + TPS68470_VCM, + TPS68470_VIO, + TPS68470_VSIO, + TPS68470_AUX1, + TPS68470_AUX2, + TPS68470_NUM_REGULATORS +}; + +struct regulator_init_data; + +struct tps68470_regulator_platform_data { + const struct regulator_init_data *reg_init_data[TPS68470_NUM_REGULATORS]; +}; + +struct tps68470_clk_platform_data { + const char *consumer_dev_name; + const char *consumer_con_id; +}; + +#endif -- cgit v1.2.3 From e522ae91b8ff7bf89d22d9322308aba1a6326996 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Sat, 4 Dec 2021 21:57:53 +0200 Subject: dt-bindings: soc: samsung: Add Exynos USI bindings Add constants for choosing USIv2 configuration mode in device tree. Those are further used in USI driver to figure out which value to write into SW_CONF register. Also document USIv2 IP-core bindings. Signed-off-by: Sam Protsenko Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20211204195757.8600-2-semen.protsenko@linaro.org Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/soc/samsung,exynos-usi.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 include/dt-bindings/soc/samsung,exynos-usi.h (limited to 'include') diff --git a/include/dt-bindings/soc/samsung,exynos-usi.h b/include/dt-bindings/soc/samsung,exynos-usi.h new file mode 100644 index 000000000000..a01af169d249 --- /dev/null +++ b/include/dt-bindings/soc/samsung,exynos-usi.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2021 Linaro Ltd. + * Author: Sam Protsenko + * + * Device Tree bindings for Samsung Exynos USI (Universal Serial Interface). + */ + +#ifndef __DT_BINDINGS_SAMSUNG_EXYNOS_USI_H +#define __DT_BINDINGS_SAMSUNG_EXYNOS_USI_H + +#define USI_V2_NONE 0 +#define USI_V2_UART 1 +#define USI_V2_SPI 2 +#define USI_V2_I2C 3 + +#endif /* __DT_BINDINGS_SAMSUNG_EXYNOS_USI_H */ -- cgit v1.2.3 From 614b7a1f28f401332736235a7d2d17ceda16945c Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 10 Dec 2021 20:56:36 +0100 Subject: bareudp: Remove bareudp_dev_create() There's no user for this function. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- include/net/bareudp.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/bareudp.h b/include/net/bareudp.h index dc65a0d71d9b..8f07a91e0f25 100644 --- a/include/net/bareudp.h +++ b/include/net/bareudp.h @@ -14,10 +14,6 @@ struct bareudp_conf { bool multi_proto_mode; }; -struct net_device *bareudp_dev_create(struct net *net, const char *name, - u8 name_assign_type, - struct bareudp_conf *info); - static inline bool netif_is_bareudp(const struct net_device *dev) { return dev->rtnl_link_ops && -- cgit v1.2.3 From dcdd77ee55a70b6ddfcfbe8e6bbf6d002994644c Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 10 Dec 2021 20:56:39 +0100 Subject: bareudp: Move definition of struct bareudp_conf to bareudp.c This structure is used only in bareudp.c. While there, adjust include files: we need netdevice.h, not skbuff.h. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- include/net/bareudp.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/bareudp.h b/include/net/bareudp.h index 8f07a91e0f25..17610c8d6361 100644 --- a/include/net/bareudp.h +++ b/include/net/bareudp.h @@ -3,17 +3,10 @@ #ifndef __NET_BAREUDP_H #define __NET_BAREUDP_H +#include #include -#include #include -struct bareudp_conf { - __be16 ethertype; - __be16 port; - u16 sport_min; - bool multi_proto_mode; -}; - static inline bool netif_is_bareudp(const struct net_device *dev) { return dev->rtnl_link_ops && -- cgit v1.2.3 From 3c118547f87e930d45a5787e386734015dd93b32 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 10 Dec 2021 21:29:59 +0100 Subject: u64_stats: Disable preemption on 32bit UP+SMP PREEMPT_RT during updates. On PREEMPT_RT the seqcount_t for synchronisation is required on 32bit architectures even on UP because the softirq (and the threaded IRQ handler) can be preempted. With the seqcount_t for synchronisation, a reader with higher priority can preempt the writer and then spin endlessly in read_seqcount_begin() while the writer can't make progress. To avoid such a lock up on PREEMPT_RT the writer must disable preemption during the update. There is no need to disable interrupts because no writer is using this API in hard-IRQ context on PREEMPT_RT. Disable preemption on 32bit-RT within the u64_stats write section. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- include/linux/u64_stats_sync.h | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index e8ec116c916b..6ad4e9032d53 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -66,7 +66,7 @@ #include struct u64_stats_sync { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) seqcount_t seq; #endif }; @@ -125,7 +125,7 @@ static inline void u64_stats_inc(u64_stats_t *p) } #endif -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) #define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) #else static inline void u64_stats_init(struct u64_stats_sync *syncp) @@ -135,15 +135,19 @@ static inline void u64_stats_init(struct u64_stats_sync *syncp) static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); write_seqcount_begin(&syncp->seq); #endif } static inline void u64_stats_update_end(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) write_seqcount_end(&syncp->seq); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); #endif } @@ -152,8 +156,11 @@ u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) { unsigned long flags = 0; -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - local_irq_save(flags); +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); + else + local_irq_save(flags); write_seqcount_begin(&syncp->seq); #endif return flags; @@ -163,15 +170,18 @@ static inline void u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, unsigned long flags) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) write_seqcount_end(&syncp->seq); - local_irq_restore(flags); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); + else + local_irq_restore(flags); #endif } static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) return read_seqcount_begin(&syncp->seq); #else return 0; @@ -180,7 +190,7 @@ static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync * static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) preempt_disable(); #endif return __u64_stats_fetch_begin(syncp); @@ -189,7 +199,7 @@ static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *sy static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) return read_seqcount_retry(&syncp->seq, start); #else return false; @@ -199,7 +209,7 @@ static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) preempt_enable(); #endif return __u64_stats_fetch_retry(syncp, start); @@ -213,7 +223,9 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, */ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) + preempt_disable(); +#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) local_irq_disable(); #endif return __u64_stats_fetch_begin(syncp); @@ -222,7 +234,9 @@ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) + preempt_enable(); +#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) local_irq_enable(); #endif return __u64_stats_fetch_retry(syncp, start); -- cgit v1.2.3 From bae9401dff62d1ac46504a343db8a69e5ac390f6 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 23 Nov 2021 14:20:46 +0100 Subject: usb: Add Xen pvUSB protocol description Add the definition of pvUSB protocol used between the pvUSB frontend in a Xen domU and the pvUSB backend in a Xen driver domain (usually Dom0). This header was originally provided by Fujitsu for Xen based on Linux 2.6.18. Changes are: - adapt to Linux kernel style guide - use Xen namespace - add lots of comments - don't use kernel internal defines Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross Link: https://lore.kernel.org/r/20211123132048.5335-2-jgross@suse.com Signed-off-by: Greg Kroah-Hartman --- include/xen/interface/io/usbif.h | 405 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 405 insertions(+) create mode 100644 include/xen/interface/io/usbif.h (limited to 'include') diff --git a/include/xen/interface/io/usbif.h b/include/xen/interface/io/usbif.h new file mode 100644 index 000000000000..a70e0b93178b --- /dev/null +++ b/include/xen/interface/io/usbif.h @@ -0,0 +1,405 @@ +/* SPDX-License-Identifier: MIT */ + +/* + * usbif.h + * + * USB I/O interface for Xen guest OSes. + * + * Copyright (C) 2009, FUJITSU LABORATORIES LTD. + * Author: Noboru Iwamatsu + */ + +#ifndef __XEN_PUBLIC_IO_USBIF_H__ +#define __XEN_PUBLIC_IO_USBIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Detailed Interface Description + * ============================== + * The pvUSB interface is using a split driver design: a frontend driver in + * the guest and a backend driver in a driver domain (normally dom0) having + * access to the physical USB device(s) being passed to the guest. + * + * The frontend and backend drivers use XenStore to initiate the connection + * between them, the I/O activity is handled via two shared ring pages and an + * event channel. As the interface between frontend and backend is at the USB + * host connector level, multiple (up to 31) physical USB devices can be + * handled by a single connection. + * + * The Xen pvUSB device name is "qusb", so the frontend's XenStore entries are + * to be found under "device/qusb", while the backend's XenStore entries are + * under "backend//qusb". + * + * When a new pvUSB connection is established, the frontend needs to setup the + * two shared ring pages for communication and the event channel. The ring + * pages need to be made available to the backend via the grant table + * interface. + * + * One of the shared ring pages is used by the backend to inform the frontend + * about USB device plug events (device to be added or removed). This is the + * "conn-ring". + * + * The other ring page is used for USB I/O communication (requests and + * responses). This is the "urb-ring". + * + * Feature and Parameter Negotiation + * ================================= + * The two halves of a Xen pvUSB driver utilize nodes within the XenStore to + * communicate capabilities and to negotiate operating parameters. This + * section enumerates these nodes which reside in the respective front and + * backend portions of the XenStore, following the XenBus convention. + * + * Any specified default value is in effect if the corresponding XenBus node + * is not present in the XenStore. + * + * XenStore nodes in sections marked "PRIVATE" are solely for use by the + * driver side whose XenBus tree contains them. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *------------------ Backend Device Identification (PRIVATE) ------------------ + * + * num-ports + * Values: unsigned [1...31] + * + * Number of ports for this (virtual) USB host connector. + * + * usb-ver + * Values: unsigned [1...2] + * + * USB version of this host connector: 1 = USB 1.1, 2 = USB 2.0. + * + * port/[1...31] + * Values: string + * + * Physical USB device connected to the given port, e.g. "3-1.5". + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: unsigned + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * urb-ring-ref + * Values: unsigned + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. This is the ring + * buffer for urb requests. + * + * conn-ring-ref + * Values: unsigned + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. This is the ring + * buffer for connection/disconnection requests. + * + * protocol + * Values: string (XEN_IO_PROTO_ABI_*) + * Default Value: XEN_IO_PROTO_ABI_NATIVE + * + * The machine ABI rules governing the format of all ring request and + * response structures. + * + * Protocol Description + * ==================== + * + *-------------------------- USB device plug events -------------------------- + * + * USB device plug events are send via the "conn-ring" shared page. As only + * events are being sent, the respective requests from the frontend to the + * backend are just dummy ones. + * The events sent to the frontend have the following layout: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | portnum | speed | 4 + * +----------------+----------------+----------------+----------------+ + * id - uint16_t, event id (taken from the actual frontend dummy request) + * portnum - uint8_t, port number (1 ... 31) + * speed - uint8_t, device XENUSB_SPEED_*, XENUSB_SPEED_NONE == unplug + * + * The dummy request: + * 0 1 octet + * +----------------+----------------+ + * | id | 2 + * +----------------+----------------+ + * id - uint16_t, guest supplied value (no need for being unique) + * + *-------------------------- USB I/O request --------------------------------- + * + * A single USB I/O request on the "urb-ring" has the following layout: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | nr_buffer_segs | 4 + * +----------------+----------------+----------------+----------------+ + * | pipe | 8 + * +----------------+----------------+----------------+----------------+ + * | transfer_flags | buffer_length | 12 + * +----------------+----------------+----------------+----------------+ + * | request type specific | 16 + * | data | 20 + * +----------------+----------------+----------------+----------------+ + * | seg[0] | 24 + * | data | 28 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | seg[XENUSB_MAX_SEGMENTS_PER_REQUEST - 1] | 144 + * | data | 148 + * +----------------+----------------+----------------+----------------+ + * Bit field bit number 0 is always least significant bit, undefined bits must + * be zero. + * id - uint16_t, guest supplied value + * nr_buffer_segs - uint16_t, number of segment entries in seg[] array + * pipe - uint32_t, bit field with multiple information: + * bits 0-4: port request to send to + * bit 5: unlink request with specified id (cancel I/O) if set (see below) + * bit 7: direction (1 = read from device) + * bits 8-14: device number on port + * bits 15-18: endpoint of device + * bits 30-31: request type: 00 = isochronous, 01 = interrupt, + * 10 = control, 11 = bulk + * transfer_flags - uint16_t, bit field with processing flags: + * bit 0: less data than specified allowed + * buffer_length - uint16_t, total length of data + * request type specific data - 8 bytes, see below + * seg[] - array with 8 byte elements, see below + * + * Request type specific data for isochronous request: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | interval | start_frame | 4 + * +----------------+----------------+----------------+----------------+ + * | number_of_packets | nr_frame_desc_segs | 8 + * +----------------+----------------+----------------+----------------+ + * interval - uint16_t, time interval in msecs between frames + * start_frame - uint16_t, start frame number + * number_of_packets - uint16_t, number of packets to transfer + * nr_frame_desc_segs - uint16_t number of seg[] frame descriptors elements + * + * Request type specific data for interrupt request: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | interval | 0 | 4 + * +----------------+----------------+----------------+----------------+ + * | 0 | 8 + * +----------------+----------------+----------------+----------------+ + * interval - uint16_t, time in msecs until interruption + * + * Request type specific data for control request: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | data of setup packet | 4 + * | | 8 + * +----------------+----------------+----------------+----------------+ + * + * Request type specific data for bulk request: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | 0 | 4 + * | 0 | 8 + * +----------------+----------------+----------------+----------------+ + * + * Request type specific data for unlink request: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | unlink_id | 0 | 4 + * +----------------+----------------+----------------+----------------+ + * | 0 | 8 + * +----------------+----------------+----------------+----------------+ + * unlink_id - uint16_t, request id of request to terminate + * + * seg[] array element layout: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | gref | 4 + * +----------------+----------------+----------------+----------------+ + * | offset | length | 8 + * +----------------+----------------+----------------+----------------+ + * gref - uint32_t, grant reference of buffer page + * offset - uint16_t, offset of buffer start in page + * length - uint16_t, length of buffer in page + * + *-------------------------- USB I/O response -------------------------------- + * + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | start_frame | 4 + * +----------------+----------------+----------------+----------------+ + * | status | 8 + * +----------------+----------------+----------------+----------------+ + * | actual_length | 12 + * +----------------+----------------+----------------+----------------+ + * | error_count | 16 + * +----------------+----------------+----------------+----------------+ + * id - uint16_t, id of the request this response belongs to + * start_frame - uint16_t, start_frame this response (iso requests only) + * status - int32_t, XENUSB_STATUS_* (non-iso requests) + * actual_length - uint32_t, actual size of data transferred + * error_count - uint32_t, number of errors (iso requests) + */ + +enum xenusb_spec_version { + XENUSB_VER_UNKNOWN = 0, + XENUSB_VER_USB11, + XENUSB_VER_USB20, + XENUSB_VER_USB30, /* not supported yet */ +}; + +/* + * USB pipe in xenusb_request + * + * - port number: bits 0-4 + * (USB_MAXCHILDREN is 31) + * + * - operation flag: bit 5 + * (0 = submit urb, + * 1 = unlink urb) + * + * - direction: bit 7 + * (0 = Host-to-Device [Out] + * 1 = Device-to-Host [In]) + * + * - device address: bits 8-14 + * + * - endpoint: bits 15-18 + * + * - pipe type: bits 30-31 + * (00 = isochronous, 01 = interrupt, + * 10 = control, 11 = bulk) + */ + +#define XENUSB_PIPE_PORT_MASK 0x0000001f +#define XENUSB_PIPE_UNLINK 0x00000020 +#define XENUSB_PIPE_DIR 0x00000080 +#define XENUSB_PIPE_DEV_MASK 0x0000007f +#define XENUSB_PIPE_DEV_SHIFT 8 +#define XENUSB_PIPE_EP_MASK 0x0000000f +#define XENUSB_PIPE_EP_SHIFT 15 +#define XENUSB_PIPE_TYPE_MASK 0x00000003 +#define XENUSB_PIPE_TYPE_SHIFT 30 +#define XENUSB_PIPE_TYPE_ISOC 0 +#define XENUSB_PIPE_TYPE_INT 1 +#define XENUSB_PIPE_TYPE_CTRL 2 +#define XENUSB_PIPE_TYPE_BULK 3 + +#define xenusb_pipeportnum(pipe) ((pipe) & XENUSB_PIPE_PORT_MASK) +#define xenusb_setportnum_pipe(pipe, portnum) ((pipe) | (portnum)) + +#define xenusb_pipeunlink(pipe) ((pipe) & XENUSB_PIPE_UNLINK) +#define xenusb_pipesubmit(pipe) (!xenusb_pipeunlink(pipe)) +#define xenusb_setunlink_pipe(pipe) ((pipe) | XENUSB_PIPE_UNLINK) + +#define xenusb_pipein(pipe) ((pipe) & XENUSB_PIPE_DIR) +#define xenusb_pipeout(pipe) (!xenusb_pipein(pipe)) + +#define xenusb_pipedevice(pipe) \ + (((pipe) >> XENUSB_PIPE_DEV_SHIFT) & XENUSB_PIPE_DEV_MASK) + +#define xenusb_pipeendpoint(pipe) \ + (((pipe) >> XENUSB_PIPE_EP_SHIFT) & XENUSB_PIPE_EP_MASK) + +#define xenusb_pipetype(pipe) \ + (((pipe) >> XENUSB_PIPE_TYPE_SHIFT) & XENUSB_PIPE_TYPE_MASK) +#define xenusb_pipeisoc(pipe) (xenusb_pipetype(pipe) == XENUSB_PIPE_TYPE_ISOC) +#define xenusb_pipeint(pipe) (xenusb_pipetype(pipe) == XENUSB_PIPE_TYPE_INT) +#define xenusb_pipectrl(pipe) (xenusb_pipetype(pipe) == XENUSB_PIPE_TYPE_CTRL) +#define xenusb_pipebulk(pipe) (xenusb_pipetype(pipe) == XENUSB_PIPE_TYPE_BULK) + +#define XENUSB_MAX_SEGMENTS_PER_REQUEST (16) +#define XENUSB_MAX_PORTNR 31 +#define XENUSB_RING_SIZE 4096 + +/* + * RING for transferring urbs. + */ +struct xenusb_request_segment { + grant_ref_t gref; + uint16_t offset; + uint16_t length; +}; + +struct xenusb_urb_request { + uint16_t id; /* request id */ + uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */ + + /* basic urb parameter */ + uint32_t pipe; + uint16_t transfer_flags; +#define XENUSB_SHORT_NOT_OK 0x0001 + uint16_t buffer_length; + union { + uint8_t ctrl[8]; /* setup_packet (Ctrl) */ + + struct { + uint16_t interval; /* maximum (1024*8) in usb core */ + uint16_t start_frame; /* start frame */ + uint16_t number_of_packets; /* number of ISO packet */ + uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */ + } isoc; + + struct { + uint16_t interval; /* maximum (1024*8) in usb core */ + uint16_t pad[3]; + } intr; + + struct { + uint16_t unlink_id; /* unlink request id */ + uint16_t pad[3]; + } unlink; + + } u; + + /* urb data segments */ + struct xenusb_request_segment seg[XENUSB_MAX_SEGMENTS_PER_REQUEST]; +}; + +struct xenusb_urb_response { + uint16_t id; /* request id */ + uint16_t start_frame; /* start frame (ISO) */ + int32_t status; /* status (non-ISO) */ +#define XENUSB_STATUS_OK 0 +#define XENUSB_STATUS_NODEV (-19) +#define XENUSB_STATUS_INVAL (-22) +#define XENUSB_STATUS_STALL (-32) +#define XENUSB_STATUS_IOERROR (-71) +#define XENUSB_STATUS_BABBLE (-75) +#define XENUSB_STATUS_SHUTDOWN (-108) + int32_t actual_length; /* actual transfer length */ + int32_t error_count; /* number of ISO errors */ +}; + +DEFINE_RING_TYPES(xenusb_urb, struct xenusb_urb_request, struct xenusb_urb_response); +#define XENUSB_URB_RING_SIZE __CONST_RING_SIZE(xenusb_urb, XENUSB_RING_SIZE) + +/* + * RING for notifying connect/disconnect events to frontend + */ +struct xenusb_conn_request { + uint16_t id; +}; + +struct xenusb_conn_response { + uint16_t id; /* request id */ + uint8_t portnum; /* port number */ + uint8_t speed; /* usb_device_speed */ +#define XENUSB_SPEED_NONE 0 +#define XENUSB_SPEED_LOW 1 +#define XENUSB_SPEED_FULL 2 +#define XENUSB_SPEED_HIGH 3 +}; + +DEFINE_RING_TYPES(xenusb_conn, struct xenusb_conn_request, struct xenusb_conn_response); +#define XENUSB_CONN_RING_SIZE __CONST_RING_SIZE(xenusb_conn, XENUSB_RING_SIZE) + +#endif /* __XEN_PUBLIC_IO_USBIF_H__ */ -- cgit v1.2.3 From 4bc5e64e6cf37007e436970024e5998ee0935651 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 26 Nov 2021 01:13:32 +0100 Subject: efi: Move efifb_setup_from_dmi() prototype from arch headers Commit 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches") made the Generic System Framebuffers (sysfb) driver able to be built on non-x86 architectures. But it left the efifb_setup_from_dmi() function prototype declaration in the architecture specific headers. This could lead to the following compiler warning as reported by the kernel test robot: drivers/firmware/efi/sysfb_efi.c:70:6: warning: no previous prototype for function 'efifb_setup_from_dmi' [-Wmissing-prototypes] void efifb_setup_from_dmi(struct screen_info *si, const char *opt) ^ drivers/firmware/efi/sysfb_efi.c:70:1: note: declare 'static' if the function is not intended to be used outside of this translation unit void efifb_setup_from_dmi(struct screen_info *si, const char *opt) Fixes: 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches") Reported-by: kernel test robot Cc: # 5.15.x Signed-off-by: Javier Martinez Canillas Acked-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20211126001333.555514-1-javierm@redhat.com Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index dbd39b20e034..ef8dbc0a1522 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1283,4 +1283,10 @@ static inline struct efi_mokvar_table_entry *efi_mokvar_entry_find( } #endif +#ifdef CONFIG_SYSFB +extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); +#else +static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { } +#endif + #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From f92c1e183604c20ce00eb889315fdaa8f2d9e509 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Dec 2021 20:32:44 +0100 Subject: bpf: Add get_func_[arg|ret|arg_cnt] helpers Adding following helpers for tracing programs: Get n-th argument of the traced function: long bpf_get_func_arg(void *ctx, u32 n, u64 *value) Get return value of the traced function: long bpf_get_func_ret(void *ctx, u64 *value) Get arguments count of the traced function: long bpf_get_func_arg_cnt(void *ctx) The trampoline now stores number of arguments on ctx-8 address, so it's easy to verify argument index and find return value argument's position. Moving function ip address on the trampoline stack behind the number of functions arguments, so it's now stored on ctx-16 address if it's needed. All helpers above are inlined by verifier. Also bit unrelated small change - using newly added function bpf_prog_has_trampoline in check_get_func_ip. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211208193245.172141-5-jolsa@kernel.org --- include/linux/bpf.h | 5 +++++ include/uapi/linux/bpf.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7a40022e3d00..965fffaf0308 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -777,6 +777,7 @@ void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); int bpf_jit_charge_modmem(u32 pages); void bpf_jit_uncharge_modmem(u32 pages); +bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) @@ -805,6 +806,10 @@ static inline bool is_bpf_image_address(unsigned long address) { return false; } +static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog) +{ + return false; +} #endif struct bpf_func_info_aux { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2820c77e4846..b0383d371b9a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4993,6 +4993,31 @@ union bpf_attr { * An integer less than, equal to, or greater than zero * if the first **s1_sz** bytes of **s1** is found to be * less than, to match, or be greater than **s2**. + * + * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) + * Description + * Get **n**-th argument (zero based) of the traced function (for tracing programs) + * returned in **value**. + * + * Return + * 0 on success. + * **-EINVAL** if n >= arguments count of traced function. + * + * long bpf_get_func_ret(void *ctx, u64 *value) + * Description + * Get return value of the traced function (for tracing programs) + * in **value**. + * + * Return + * 0 on success. + * **-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN. + * + * long bpf_get_func_arg_cnt(void *ctx) + * Description + * Get number of arguments of the traced function (for tracing programs). + * + * Return + * The number of arguments of the traced function. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5178,6 +5203,9 @@ union bpf_attr { FN(find_vma), \ FN(loop), \ FN(strncmp), \ + FN(get_func_arg), \ + FN(get_func_ret), \ + FN(get_func_arg_cnt), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From beba81faad86fc2bad567b1c029d6a000a43ca78 Mon Sep 17 00:00:00 2001 From: Aswath Govindraju Date: Tue, 7 Dec 2021 13:39:01 +0530 Subject: dt-bindings: pinctrl: k3: Introduce pinmux definitions for J721S2 Add pinctrl macros for J721S2 SoC. These macro definitions are similar to that of J721E, but adding new definitions to avoid any naming confusions in the soc dts files. checkpatch insists the following error exists: ERROR: Macros with complex values should be enclosed in parentheses However, we do not need parentheses enclosing the values for this macro as we do intend it to generate two separate values as has been done for other similar platforms. Signed-off-by: Aswath Govindraju Signed-off-by: Vignesh Raghavendra Reviewed-by: Kishon Vijay Abraham I Acked-by: Rob Herring Link: https://lore.kernel.org/r/20211207080904.14324-3-a-govindraju@ti.com --- include/dt-bindings/pinctrl/k3.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/pinctrl/k3.h b/include/dt-bindings/pinctrl/k3.h index e085f102b283..63e038e36ca3 100644 --- a/include/dt-bindings/pinctrl/k3.h +++ b/include/dt-bindings/pinctrl/k3.h @@ -38,4 +38,7 @@ #define AM64X_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode)) #define AM64X_MCU_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode)) +#define J721S2_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode)) +#define J721S2_WKUP_IOPAD(pa, val, muxmode) (((pa) & 0x1fff)) ((val) | (muxmode)) + #endif -- cgit v1.2.3 From 0e25498f8cd43c1b5aa327f373dd094e9a006da7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 28 Jun 2021 14:52:01 -0500 Subject: exit: Add and use make_task_dead. There are two big uses of do_exit. The first is it's design use to be the guts of the exit(2) system call. The second use is to terminate a task after something catastrophic has happened like a NULL pointer in kernel code. Add a function make_task_dead that is initialy exactly the same as do_exit to cover the cases where do_exit is called to handle catastrophic failure. In time this can probably be reduced to just a light wrapper around do_task_dead. For now keep it exactly the same so that there will be no behavioral differences introducing this new concept. Replace all of the uses of do_exit that use it for catastraphic task cleanup with make_task_dead to make it clear what the code is doing. As part of this rename rewind_stack_do_exit rewind_stack_and_make_dead. Signed-off-by: "Eric W. Biederman" --- include/linux/sched/task.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index ba88a6987400..2d4bbd9c3278 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -59,6 +59,7 @@ extern void sched_post_fork(struct task_struct *p, extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); +void __noreturn make_task_dead(int signr); extern void proc_caches_init(void); -- cgit v1.2.3 From bbda86e988d4c124e4cfa816291cbd583ae8bfb1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 Nov 2021 10:27:36 -0600 Subject: exit: Implement kthread_exit The way the per task_struct exit_code is used by kernel threads is not quite compatible how it is used by userspace applications. The low byte of the userspace exit_code value encodes the exit signal. While kthreads just use the value as an int holding ordinary kernel function exit status like -EPERM. Add kthread_exit to clearly separate the two kinds of uses. Signed-off-by: "Eric W. Biederman" --- include/linux/kthread.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 346b0f269161..22c43d419687 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -70,6 +70,7 @@ void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); +void kthread_exit(long result) __noreturn; int kthreadd(void *unused); extern struct task_struct *kthreadd_task; -- cgit v1.2.3 From ca3574bd653aba234a4b31955f2778947403be16 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 3 Dec 2021 11:00:19 -0600 Subject: exit: Rename module_put_and_exit to module_put_and_kthread_exit Update module_put_and_exit to call kthread_exit instead of do_exit. Change the name to reflect this change in functionality. All of the users of module_put_and_exit are causing the current kthread to exit so this change makes it clear what is happening. There is no functional change. Signed-off-by: "Eric W. Biederman" --- include/linux/module.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index c9f1200b2312..f03be97e9ec1 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -595,9 +595,9 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name); -extern void __noreturn __module_put_and_exit(struct module *mod, +extern void __noreturn __module_put_and_kthread_exit(struct module *mod, long code); -#define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code) +#define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code) #ifdef CONFIG_MODULE_UNLOAD int module_refcount(struct module *mod); @@ -790,7 +790,7 @@ static inline int unregister_module_notifier(struct notifier_block *nb) return 0; } -#define module_put_and_exit(code) do_exit(code) +#define module_put_and_kthread_exit(code) kthread_exit(code) static inline void print_modules(void) { -- cgit v1.2.3 From cead18552660702a4a46f58e65188fe5f36e9dfe Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 Nov 2021 11:15:19 -0600 Subject: exit: Rename complete_and_exit to kthread_complete_and_exit Update complete_and_exit to call kthread_exit instead of do_exit. Change the name to reflect this change in functionality. All of the users of complete_and_exit are causing the current kthread to exit so this change makes it clear what is happening. Move the implementation of kthread_complete_and_exit from kernel/exit.c to to kernel/kthread.c. As this function is kthread specific it makes most sense to live with the kthread functions. There are no functional change. Signed-off-by: "Eric W. Biederman" --- include/linux/kernel.h | 1 - include/linux/kthread.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 77755ac3e189..055eb203c00e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -187,7 +187,6 @@ static inline void might_fault(void) { } #endif void do_exit(long error_code) __noreturn; -void complete_and_exit(struct completion *, long) __noreturn; extern int num_to_str(char *buf, int size, unsigned long long num, unsigned int width); diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 22c43d419687..d86a7e3b9a52 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -71,6 +71,7 @@ int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); void kthread_exit(long result) __noreturn; +void kthread_complete_and_exit(struct completion *, long) __noreturn; int kthreadd(void *unused); extern struct task_struct *kthreadd_task; -- cgit v1.2.3 From 40966e316f86b8cfd83abd31ccb4df729309d3e7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 2 Dec 2021 09:56:14 -0600 Subject: kthread: Ensure struct kthread is present for all kthreads Today the rules are a bit iffy and arbitrary about which kernel threads have struct kthread present. Both idle threads and thread started with create_kthread want struct kthread present so that is effectively all kernel threads. Make the rule that if PF_KTHREAD and the task is running then struct kthread is present. This will allow the kernel thread code to using tsk->exit_code with different semantics from ordinary processes. To make ensure that struct kthread is present for all kernel threads move it's allocation into copy_process. Add a deallocation of struct kthread in exec for processes that were kernel threads. Move the allocation of struct kthread for the initial thread earlier so that it is not repeated for each additional idle thread. Move the initialization of struct kthread into set_kthread_struct so that the structure is always and reliably initailized. Clear set_child_tid in free_kthread_struct to ensure the kthread struct is reliably freed during exec. The function free_kthread_struct does not need to clear vfork_done during exec as exec_mm_release called from exec_mmap has already cleared vfork_done. Signed-off-by: "Eric W. Biederman" --- include/linux/kthread.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index d86a7e3b9a52..4f3433afb54b 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -33,7 +33,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), unsigned int cpu, const char *namefmt); -void set_kthread_struct(struct task_struct *p); +bool set_kthread_struct(struct task_struct *p); void kthread_set_per_cpu(struct task_struct *k, int cpu); bool kthread_is_per_cpu(struct task_struct *k); -- cgit v1.2.3 From df5e49c880ea0776806b8a9f8ab95e035272cf6f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Nov 2021 15:51:25 +1100 Subject: SUNRPC: change svc_get() to return the svc. It is common for 'get' functions to return the object that was 'got', and there are a couple of places where users of svc_get() would be a little simpler if svc_get() did that. Make it so. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 0ae28ae6caf2..5d9568953fcd 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -120,9 +120,10 @@ struct svc_serv { * change the number of threads. Horrible, but there it is. * Should be called with the "service mutex" held. */ -static inline void svc_get(struct svc_serv *serv) +static inline struct svc_serv *svc_get(struct svc_serv *serv) { serv->sv_nrthreads++; + return serv; } /* -- cgit v1.2.3 From 8c62d12740a1450d2e8456d5747f440e10db281a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Nov 2021 15:51:25 +1100 Subject: SUNRPC/NFSD: clean up get/put functions. svc_destroy() is poorly named - it doesn't necessarily destroy the svc, it might just reduce the ref count. nfsd_destroy() is poorly named for the same reason. This patch: - removes the refcount functionality from svc_destroy(), moving it to a new svc_put(). Almost all previous callers of svc_destroy() now call svc_put(). - renames nfsd_destroy() to nfsd_put() and improves the code, using the new svc_destroy() rather than svc_put() - removes a few comments that explain the important for balanced get/put calls. This should be obvious. The only non-trivial part of this is that svc_destroy() would call svc_sock_update() on a non-final decrement. It can no longer do that, and svc_put() isn't really a good place of it. This call is now made from svc_exit_thread() which seems like a good place. This makes the call *before* sv_nrthreads is decremented rather than after. This is not particularly important as the call just sets a flag which causes sv_nrthreads set be checked later. A subsequent patch will improve the ordering. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 5d9568953fcd..73d56d33a36d 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -114,8 +114,13 @@ struct svc_serv { #endif /* CONFIG_SUNRPC_BACKCHANNEL */ }; -/* - * We use sv_nrthreads as a reference count. svc_destroy() drops +/** + * svc_get() - increment reference count on a SUNRPC serv + * @serv: the svc_serv to have count incremented + * + * Returns: the svc_serv that was passed in. + * + * We use sv_nrthreads as a reference count. svc_put() drops * this refcount, so we need to bump it up around operations that * change the number of threads. Horrible, but there it is. * Should be called with the "service mutex" held. @@ -126,6 +131,22 @@ static inline struct svc_serv *svc_get(struct svc_serv *serv) return serv; } +void svc_destroy(struct svc_serv *serv); + +/** + * svc_put - decrement reference count on a SUNRPC serv + * @serv: the svc_serv to have count decremented + * + * When the reference count reaches zero, svc_destroy() + * is called to clean up and free the serv. + */ +static inline void svc_put(struct svc_serv *serv) +{ + serv->sv_nrthreads -= 1; + if (serv->sv_nrthreads == 0) + svc_destroy(serv); +} + /* * Maximum payload size supported by a kernel RPC server. * This is use to determine the max number of pages nfsd is @@ -515,7 +536,6 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); -void svc_destroy(struct svc_serv *); void svc_shutdown_net(struct svc_serv *, struct net *); int svc_process(struct svc_rqst *); int bc_svc_process(struct svc_serv *, struct rpc_rqst *, -- cgit v1.2.3 From ec52361df99b490f6af412b046df9799b92c1050 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Nov 2021 15:51:25 +1100 Subject: SUNRPC: stop using ->sv_nrthreads as a refcount The use of sv_nrthreads as a general refcount results in clumsy code, as is seen by various comments needed to explain the situation. This patch introduces a 'struct kref' and uses that for reference counting, leaving sv_nrthreads to be a pure count of threads. The kref is managed particularly in svc_get() and svc_put(), and also nfsd_put(); svc_destroy() now takes a pointer to the embedded kref, rather than to the serv. nfsd allows the svc_serv to exist with ->sv_nrhtreads being zero. This happens when a transport is created before the first thread is started. To support this, a 'keep_active' flag is introduced which holds a ref on the svc_serv. This is set when any listening socket is successfully added (unless there are running threads), and cleared when the number of threads is set. So when the last thread exits, the nfs_serv will be destroyed. The use of 'keep_active' replaces previous code which checked if there were any permanent sockets. We no longer clear ->rq_server when nfsd() exits. This was done to prevent svc_exit_thread() from calling svc_destroy(). Instead we take an extra reference to the svc_serv to prevent svc_destroy() from being called. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 73d56d33a36d..3903b4ae8ac5 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -85,6 +85,7 @@ struct svc_serv { struct svc_program * sv_program; /* RPC program */ struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; + struct kref sv_refcnt; unsigned int sv_nrthreads; /* # of server threads */ unsigned int sv_maxconn; /* max connections allowed or * '0' causing max to be based @@ -119,19 +120,14 @@ struct svc_serv { * @serv: the svc_serv to have count incremented * * Returns: the svc_serv that was passed in. - * - * We use sv_nrthreads as a reference count. svc_put() drops - * this refcount, so we need to bump it up around operations that - * change the number of threads. Horrible, but there it is. - * Should be called with the "service mutex" held. */ static inline struct svc_serv *svc_get(struct svc_serv *serv) { - serv->sv_nrthreads++; + kref_get(&serv->sv_refcnt); return serv; } -void svc_destroy(struct svc_serv *serv); +void svc_destroy(struct kref *); /** * svc_put - decrement reference count on a SUNRPC serv @@ -142,9 +138,7 @@ void svc_destroy(struct svc_serv *serv); */ static inline void svc_put(struct svc_serv *serv) { - serv->sv_nrthreads -= 1; - if (serv->sv_nrthreads == 0) - svc_destroy(serv); + kref_put(&serv->sv_refcnt, svc_destroy); } /* -- cgit v1.2.3 From 3409e4f1e8f239f0ed81be0b068ecf4e73e2e826 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Nov 2021 15:51:25 +1100 Subject: NFSD: Make it possible to use svc_set_num_threads_sync nfsd cannot currently use svc_set_num_threads_sync. It instead uses svc_set_num_threads which does *not* wait for threads to all exit, and has a separate mechanism (nfsd_shutdown_complete) to wait for completion. The reason that nfsd is unlike other services is that nfsd threads can exit separately from svc_set_num_threads being called - they die on receipt of SIGKILL. Also, when the last thread exits, the service must be shut down (sockets closed). For this, the nfsd_mutex needs to be taken, and as that mutex needs to be held while svc_set_num_threads is called, the one cannot wait for the other. This patch changes the nfsd thread so that it can drop the ref on the service without blocking on nfsd_mutex, so that svc_set_num_threads_sync can be used: - if it can drop a non-last reference, it does that. This does not trigger shutdown and does not require a mutex. This will likely happen for all but the last thread signalled, and for all threads being shut down by nfsd_shutdown_threads() - if it can get the mutex without blocking (trylock), it does that and then drops the reference. This will likely happen for the last thread killed by SIGKILL - Otherwise there might be an unrelated task holding the mutex, possibly in another network namespace, or nfsd_shutdown_threads() might be just about to get a reference on the service, after which we can drop ours safely. We cannot conveniently get wakeup notifications on these events, and we are unlikely to need to, so we sleep briefly and check again. With this we can discard nfsd_shutdown_complete and nfsd_complete_shutdown(), and switch to svc_set_num_threads_sync. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3903b4ae8ac5..36bfc0281988 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -141,6 +141,19 @@ static inline void svc_put(struct svc_serv *serv) kref_put(&serv->sv_refcnt, svc_destroy); } +/** + * svc_put_not_last - decrement non-final reference count on SUNRPC serv + * @serv: the svc_serv to have count decremented + * + * Returns: %true is refcount was decremented. + * + * If the refcount is 1, it is not decremented and instead failure is reported. + */ +static inline bool svc_put_not_last(struct svc_serv *serv) +{ + return refcount_dec_not_one(&serv->sv_refcnt.refcount); +} + /* * Maximum payload size supported by a kernel RPC server. * This is use to determine the max number of pages nfsd is -- cgit v1.2.3 From 3ebdbe5203a874614819700d3f470724cb803709 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Nov 2021 15:51:25 +1100 Subject: SUNRPC: discard svo_setup and rename svc_set_num_threads_sync() The ->svo_setup callback serves no purpose. It is always called from within the same module that chooses which callback is needed. So discard it and call the relevant function directly. Now that svc_set_num_threads() is no longer used remove it and rename svc_set_num_threads_sync() to remove the "_sync" suffix. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 36bfc0281988..0b38c6eaf985 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -64,9 +64,6 @@ struct svc_serv_ops { /* queue up a transport for servicing */ void (*svo_enqueue_xprt)(struct svc_xprt *); - /* set up thread (or whatever) execution context */ - int (*svo_setup)(struct svc_serv *, struct svc_pool *, int); - /* optional module to count when adding threads (pooled svcs only) */ struct module *svo_module; }; @@ -541,7 +538,6 @@ void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, const struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); -int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_shutdown_net(struct svc_serv *, struct net *); int svc_process(struct svc_rqst *); -- cgit v1.2.3 From cf0e124e0a489944d08fcc3c694d2b234d2cc658 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Nov 2021 15:51:25 +1100 Subject: SUNRPC: move the pool_map definitions (back) into svc.c These definitions are not used outside of svc.c, and there is no evidence that they ever have been. So move them into svc.c and make the declarations 'static'. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 0b38c6eaf985..d69e6108cb83 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -494,29 +494,6 @@ struct svc_procedure { const char * pc_name; /* for display */ }; -/* - * Mode for mapping cpus to pools. - */ -enum { - SVC_POOL_AUTO = -1, /* choose one of the others */ - SVC_POOL_GLOBAL, /* no mapping, just a single global pool - * (legacy & UP mode) */ - SVC_POOL_PERCPU, /* one pool per cpu */ - SVC_POOL_PERNODE /* one pool per numa node */ -}; - -struct svc_pool_map { - int count; /* How many svc_servs use us */ - int mode; /* Note: int not enum to avoid - * warnings about "enumeration value - * not handled in switch" */ - unsigned int npools; - unsigned int *pool_to; /* maps pool id to cpu or node */ - unsigned int *to_pool; /* maps cpu or node to pool id */ -}; - -extern struct svc_pool_map svc_pool_map; - /* * Function prototypes. */ @@ -533,8 +510,6 @@ void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page); void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); -unsigned int svc_pool_map_get(void); -void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, const struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); -- cgit v1.2.3 From 6b044fbaab02292fedb17565dbb3f2528083b169 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Nov 2021 15:51:25 +1100 Subject: lockd: use svc_set_num_threads() for thread start and stop svc_set_num_threads() does everything that lockd_start_svc() does, except set sv_maxconn. It also (when passed 0) finds the threads and stops them with kthread_stop(). So move the setting for sv_maxconn, and use svc_set_num_thread() We now don't need nlmsvc_task. Now that we use svc_set_num_threads() it makes sense to set svo_module. This request that the thread exists with module_put_and_exit(). Also fix the documentation for svo_module to make this explicit. svc_prepare_thread is now only used where it is defined, so it can be made static. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index d69e6108cb83..cf175d47c6b7 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -64,7 +64,9 @@ struct svc_serv_ops { /* queue up a transport for servicing */ void (*svo_enqueue_xprt)(struct svc_xprt *); - /* optional module to count when adding threads (pooled svcs only) */ + /* optional module to count when adding threads. + * Thread function must call module_put_and_exit() to exit. + */ struct module *svo_module; }; @@ -504,8 +506,6 @@ struct svc_serv *svc_create(struct svc_program *, unsigned int, const struct svc_serv_ops *); struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node); -struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, - struct svc_pool *pool, int node); void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page); void svc_rqst_free(struct svc_rqst *); -- cgit v1.2.3 From 5089f3d97552b0b07101e02a3fca0146b9b9d3b5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 19 Oct 2021 15:17:21 -0400 Subject: SUNRPC: Remove low signal-to-noise tracepoints I'm about to add more information to the server-side SUNRPC tracepoints, so I'm going to offset the increased trace log consumption by getting rid of some tracepoints that fire frequently but don't offer much value. trace_svc_xprt_received() was useful for debugging, perhaps, but is not generally informative. trace_svc_handle_xprt() reports largely the same information as trace_svc_xdr_recvfrom(). As a clean-up, rename trace_svc_xprt_do_enqueue() to match svc_xprt_dequeue(). Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 3a99358c262b..684cc0e322fa 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1768,7 +1768,7 @@ TRACE_EVENT(svc_xprt_create_err, __entry->error) ); -TRACE_EVENT(svc_xprt_do_enqueue, +TRACE_EVENT(svc_xprt_enqueue, TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst), TP_ARGS(xprt, rqst), @@ -1815,7 +1815,6 @@ DECLARE_EVENT_CLASS(svc_xprt_event, ), \ TP_ARGS(xprt)) -DEFINE_SVC_XPRT_EVENT(received); DEFINE_SVC_XPRT_EVENT(no_write_space); DEFINE_SVC_XPRT_EVENT(close); DEFINE_SVC_XPRT_EVENT(detach); @@ -1902,27 +1901,6 @@ TRACE_EVENT(svc_alloc_arg_err, TP_printk("pages=%u", __entry->pages) ); -TRACE_EVENT(svc_handle_xprt, - TP_PROTO(struct svc_xprt *xprt, int len), - - TP_ARGS(xprt, len), - - TP_STRUCT__entry( - __field(int, len) - __field(unsigned long, flags) - __string(addr, xprt->xpt_remotebuf) - ), - - TP_fast_assign( - __entry->len = len; - __entry->flags = xprt->xpt_flags; - __assign_str(addr, xprt->xpt_remotebuf); - ), - - TP_printk("addr=%s len=%d flags=%s", __get_str(addr), - __entry->len, show_svc_xprt_flags(__entry->flags)) -); - TRACE_EVENT(svc_stats_latency, TP_PROTO(const struct svc_rqst *rqst), -- cgit v1.2.3 From c8064e5b4adac5e1255cf4f3b374e75b5376e7ca Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Nov 2021 11:08:07 +0100 Subject: bpf: Let bpf_warn_invalid_xdp_action() report more info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In non trivial scenarios, the action id alone is not sufficient to identify the program causing the warning. Before the previous patch, the generated stack-trace pointed out at least the involved device driver. Let's additionally include the program name and id, and the relevant device name. If the user needs additional infos, he can fetch them via a kernel probe, leveraging the arguments added here. Signed-off-by: Paolo Abeni Signed-off-by: Daniel Borkmann Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/ddb96bb975cbfddb1546cf5da60e77d5100b533c.1638189075.git.pabeni@redhat.com --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 68c6b5c208e7..60eec80fa1d4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1027,7 +1027,7 @@ void xdp_do_flush(void); */ #define xdp_do_flush_map xdp_do_flush -void bpf_warn_invalid_xdp_action(u32 act); +void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act); #ifdef CONFIG_INET struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, -- cgit v1.2.3 From d27a662290963a1cde26cdfdbac71a546c06e94a Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 10 Dec 2021 18:15:11 +0100 Subject: xsk: Wipe out dead zero_copy_allocator declarations zero_copy_allocator has been removed back when Bjorn Topel introduced xsk_buff_pool. Remove references to it that were dangling in the tree. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20211210171511.11574-1-maciej.fijalkowski@intel.com --- include/net/xdp_priv.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h index a9d5b7603b89..a2d58b1a12e1 100644 --- a/include/net/xdp_priv.h +++ b/include/net/xdp_priv.h @@ -10,7 +10,6 @@ struct xdp_mem_allocator { union { void *allocator; struct page_pool *page_pool; - struct zero_copy_allocator *zc_alloc; }; struct rhash_head node; struct rcu_head rcu; -- cgit v1.2.3 From 22c3f2f56bd9c901e1da69040680c311f310635d Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 1 Dec 2021 11:36:18 -0800 Subject: net/mlx5: Separate FDB namespace This patch doesn't add an additional namespaces, but just separates the naming to be used by each FDB user, bypass and kernel. Downstream patches will actually split this up and allow to have more than single priority for the bypass users. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed Acked-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index cd2d4c572367..b1aad14689e3 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -73,6 +73,7 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_KERNEL, MLX5_FLOW_NAMESPACE_LEFTOVERS, MLX5_FLOW_NAMESPACE_ANCHOR, + MLX5_FLOW_NAMESPACE_FDB_BYPASS, MLX5_FLOW_NAMESPACE_FDB, MLX5_FLOW_NAMESPACE_ESW_EGRESS, MLX5_FLOW_NAMESPACE_ESW_INGRESS, -- cgit v1.2.3 From 057ccd9db760fc2c336b1138dab345bc2d3cdd35 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 8 Dec 2021 21:17:53 +0800 Subject: dt-bindings: power: imx8ulp: add power domain header file Add i.MX8ULP power domain header file Acked-by: Rob Herring Signed-off-by: Peng Fan Signed-off-by: Shawn Guo --- include/dt-bindings/power/imx8ulp-power.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 include/dt-bindings/power/imx8ulp-power.h (limited to 'include') diff --git a/include/dt-bindings/power/imx8ulp-power.h b/include/dt-bindings/power/imx8ulp-power.h new file mode 100644 index 000000000000..a556b2e96df1 --- /dev/null +++ b/include/dt-bindings/power/imx8ulp-power.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* + * Copyright 2021 NXP + */ + +#ifndef __DT_BINDINGS_IMX8ULP_POWER_H__ +#define __DT_BINDINGS_IMX8ULP_POWER_H__ + +#define IMX8ULP_PD_DMA1 0 +#define IMX8ULP_PD_FLEXSPI2 1 +#define IMX8ULP_PD_USB0 2 +#define IMX8ULP_PD_USDHC0 3 +#define IMX8ULP_PD_USDHC1 4 +#define IMX8ULP_PD_USDHC2_USB1 5 +#define IMX8ULP_PD_DCNANO 6 +#define IMX8ULP_PD_EPDC 7 +#define IMX8ULP_PD_DMA2 8 +#define IMX8ULP_PD_GPU2D 9 +#define IMX8ULP_PD_GPU3D 10 +#define IMX8ULP_PD_HIFI4 11 +#define IMX8ULP_PD_ISI 12 +#define IMX8ULP_PD_MIPI_CSI 13 +#define IMX8ULP_PD_MIPI_DSI 14 +#define IMX8ULP_PD_PXP 15 + +#endif -- cgit v1.2.3 From 8aa45b544db9788b0fcc7a300eba8a3ade5d3d50 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 10 Dec 2021 13:11:34 +0200 Subject: HID: Add map_msc() to avoid boilerplate code Since we are going to have more MSC events too, add map_msc() that can be used to fill in necessary fields and avoid boilerplate code. Signed-off-by: Mika Westerberg Reviewed-by: Benjamin Tissoires Signed-off-by: Tero Kristo Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211210111138.1248187-2-tero.kristo@linux.intel.com --- include/linux/hid.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index b2fea7fc54a1..f18e2cf5d74d 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1010,6 +1010,10 @@ static inline void hid_map_usage(struct hid_input *hidinput, bmap = input->ledbit; limit = LED_MAX; break; + case EV_MSC: + bmap = input->mscbit; + limit = MSC_MAX; + break; } if (unlikely(c > limit || !bmap)) { -- cgit v1.2.3 From ae7fafa6896ae762ff489a5e71c8e5fabfeb61ee Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 10 Dec 2021 13:11:36 +0200 Subject: HID: Add hid usages for USI style pens Add usage codes for USI style pens, based on the USB-HID usage table: https://usb.org/document-library/hid-usage-tables-122 See chapter 16, Digitizers Page (0x0D) Signed-off-by: Tero Kristo Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211210111138.1248187-4-tero.kristo@linux.intel.com --- include/linux/hid.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index f18e2cf5d74d..fa2ddda84a53 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -241,6 +241,7 @@ struct hid_item { #define HID_DG_TOUCH 0x000d0033 #define HID_DG_UNTOUCH 0x000d0034 #define HID_DG_TAP 0x000d0035 +#define HID_DG_TRANSDUCER_INDEX 0x000d0038 #define HID_DG_TABLETFUNCTIONKEY 0x000d0039 #define HID_DG_PROGRAMCHANGEKEY 0x000d003a #define HID_DG_BATTERYSTRENGTH 0x000d003b @@ -253,6 +254,15 @@ struct hid_item { #define HID_DG_BARRELSWITCH 0x000d0044 #define HID_DG_ERASER 0x000d0045 #define HID_DG_TABLETPICK 0x000d0046 +#define HID_DG_PEN_COLOR 0x000d005c +#define HID_DG_PEN_LINE_WIDTH 0x000d005e +#define HID_DG_PEN_LINE_STYLE 0x000d0070 +#define HID_DG_PEN_LINE_STYLE_INK 0x000d0072 +#define HID_DG_PEN_LINE_STYLE_PENCIL 0x000d0073 +#define HID_DG_PEN_LINE_STYLE_HIGHLIGHTER 0x000d0074 +#define HID_DG_PEN_LINE_STYLE_CHISEL_MARKER 0x000d0075 +#define HID_DG_PEN_LINE_STYLE_BRUSH 0x000d0076 +#define HID_DG_PEN_LINE_STYLE_NO_PREFERENCE 0x000d0077 #define HID_CP_CONSUMERCONTROL 0x000c0001 #define HID_CP_NUMERICKEYPAD 0x000c0002 -- cgit v1.2.3 From 5904a3f9d756f108279f8c5b8f17ca6ddfb28d24 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 10 Dec 2021 13:11:37 +0200 Subject: HID: input: Make hidinput_find_field() static This function is not called outside of hid-input.c so we can make it static. Signed-off-by: Mika Westerberg Reviewed-by: Benjamin Tissoires Signed-off-by: Tero Kristo Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211210111138.1248187-5-tero.kristo@linux.intel.com --- include/linux/hid.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index fa2ddda84a53..ff8b6973022a 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -899,7 +899,6 @@ extern void hidinput_disconnect(struct hid_device *); int hid_set_field(struct hid_field *, unsigned, __s32); int hid_input_report(struct hid_device *, int type, u8 *, u32, int); -int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int code, struct hid_field **field); struct hid_field *hidinput_get_led_field(struct hid_device *hid); unsigned int hidinput_count_leds(struct hid_device *hid); __s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code); -- cgit v1.2.3 From 98046f7486db723ec8bb99a950a4fa5f5be55cd1 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Thu, 25 Nov 2021 15:05:26 +0800 Subject: fuse: support per inode DAX in fuse protocol Expand the fuse protocol to support per inode DAX. FUSE_HAS_INODE_DAX flag is added indicating if fuse server/client supporting per inode DAX. It can be conveyed in both FUSE_INIT request and reply. FUSE_ATTR_DAX flag is added indicating if DAX shall be enabled for corresponding file. It is conveyed in FUSE_LOOKUP reply. Signed-off-by: Jeffle Xu Reviewed-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 3f0ea63fec08..d6ccee961891 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -193,6 +193,7 @@ * - add flags2 to fuse_init_in and fuse_init_out * - add FUSE_SECURITY_CTX init flag * - add security context to create, mkdir, symlink, and mknod requests + * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX */ #ifndef _LINUX_FUSE_H @@ -351,6 +352,7 @@ struct fuse_file_lock { * FUSE_INIT_RESERVED: reserved, do not use * FUSE_SECURITY_CTX: add security context to create, mkdir, symlink, and * mknod + * FUSE_HAS_INODE_DAX: use per inode DAX */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -386,6 +388,7 @@ struct fuse_file_lock { #define FUSE_INIT_RESERVED (1 << 31) /* bits 32..63 get shifted down 32 bits into the flags2 field */ #define FUSE_SECURITY_CTX (1ULL << 32) +#define FUSE_HAS_INODE_DAX (1ULL << 33) /** * CUSE INIT request/reply flags @@ -468,8 +471,10 @@ struct fuse_file_lock { * fuse_attr flags * * FUSE_ATTR_SUBMOUNT: Object is a submount root + * FUSE_ATTR_DAX: Enable DAX for this file in per inode DAX mode */ #define FUSE_ATTR_SUBMOUNT (1 << 0) +#define FUSE_ATTR_DAX (1 << 1) /** * Open flags -- cgit v1.2.3 From fd8d135b2c5e88662f2729e034913f183455a667 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Wed, 8 Dec 2021 22:40:43 +1000 Subject: HID: quirks: Allow inverting the absolute X/Y values Add a HID_QUIRK_X_INVERT/HID_QUIRK_Y_INVERT quirk that can be used to invert the X/Y values. Signed-off-by: Alistair Francis [bentiss: silence checkpatch warning] Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211208124045.61815-2-alistair@alistair23.me --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index ff8b6973022a..a14c089eb0c1 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -359,6 +359,8 @@ struct hid_item { /* BIT(9) reserved for backward compatibility, was NO_INIT_INPUT_REPORTS */ #define HID_QUIRK_ALWAYS_POLL BIT(10) #define HID_QUIRK_INPUT_PER_APP BIT(11) +#define HID_QUIRK_X_INVERT BIT(12) +#define HID_QUIRK_Y_INVERT BIT(13) #define HID_QUIRK_SKIP_OUTPUT_REPORTS BIT(16) #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID BIT(17) #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP BIT(18) -- cgit v1.2.3 From 43d5ac7d07023cd133b978de473b3400edad941f Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 23 Nov 2021 11:24:04 +0000 Subject: drm: document DRM_IOCTL_MODE_GETFB2 There are a few details specific to the GETFB2 IOCTL. It's not immediately clear how user-space should check for the number of planes. Suggest using the handles field or the pitches field. The modifier array is filled with zeroes, ie. DRM_FORMAT_MOD_LINEAR. So explicitly tell user-space to not look at it unless the flag is set. Changes in v2 (Daniel): - Mention that handles should be used to compute the number of planes, and only refer to pitches as a fallback. - Reword bit about undefined modifier. Signed-off-by: Simon Ser Acked-by: Daniel Vetter Acked-by: Pekka Paalanen Acked-by: Daniel Stone Link: https://patchwork.freedesktop.org/patch/msgid/20211123112400.22245-1-contact@emersion.fr --- include/uapi/drm/drm.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 3b810b53ba8b..642808520d92 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -1096,6 +1096,24 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) +/** + * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. + * + * This queries metadata about a framebuffer. User-space fills + * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the + * struct as the output. + * + * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles + * will be filled with GEM buffer handles. Planes are valid until one has a + * zero handle -- this can be used to compute the number of planes. + * + * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid + * until one has a zero &drm_mode_fb_cmd2.pitches. + * + * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set + * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the + * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. + */ #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) /* -- cgit v1.2.3 From 369461ce8fb6c8156206c7110d7da48e9fbc41bb Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 8 Dec 2021 14:11:20 -0600 Subject: x86: perf: Move RDPMC event flag to a common definition In preparation to enable user counter access on arm64 and to move some of the user access handling to perf core, create a common event flag for user counter access and convert x86 to use it. Since the architecture specific flags start at the LSB, starting at the MSB for common flags. Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Kan Liang Cc: Thomas Gleixner Cc: Borislav Petkov Cc: x86@kernel.org Cc: "H. Peter Anvin" Cc: linux-perf-users@vger.kernel.org Reviewed-by: Mark Rutland Reviewed-by: Thomas Gleixner Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20211208201124.310740-2-robh@kernel.org Signed-off-by: Will Deacon --- include/linux/perf_event.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0dcfd265beed..ba9467972c09 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -129,6 +129,15 @@ struct hw_perf_event_extra { int idx; /* index in shared_regs->regs[] */ }; +/** + * hw_perf_event::flag values + * + * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific + * usage. + */ +#define PERF_EVENT_FLAG_ARCH 0x0000ffff +#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 + /** * struct hw_perf_event - performance event hardware details: */ -- cgit v1.2.3 From 82ff0c022d19c2ad69a472692bb7ee01ac07a40b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 8 Dec 2021 14:11:21 -0600 Subject: perf: Add a counter for number of user access events in context On arm64, user space counter access will be controlled differently compared to x86. On x86, access in the strictest mode is enabled for all tasks in an MM when any event is mmap'ed. For arm64, access is explicitly requested for an event and only enabled when the event's context is active. This avoids hooks into the arch context switch code and gives better control of when access is enabled. In order to configure user space access when the PMU is enabled, it is necessary to know if any event (currently active or not) in the current context has user space accessed enabled. Add a counter similar to other counters in the context to avoid walking the event list every time. Reviewed-by: Mark Rutland Reviewed-by: Thomas Gleixner Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20211208201124.310740-3-robh@kernel.org Signed-off-by: Will Deacon --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ba9467972c09..411e34210fbf 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -831,6 +831,7 @@ struct perf_event_context { int nr_events; int nr_active; + int nr_user; int is_active; int nr_stat; int nr_freq; -- cgit v1.2.3 From 9c9211a3fc7aa41b2952765b62000443b3bb6f23 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 10 Dec 2021 16:59:58 +0800 Subject: net_tstamp: add new flag HWTSTAMP_FLAG_BONDED_PHC_INDEX Since commit 94dd016ae538 ("bond: pass get_ts_info and SIOC[SG]HWTSTAMP ioctl to active device") the user could get bond active interface's PHC index directly. But when there is a failover, the bond active interface will change, thus the PHC index is also changed. This may break the user's program if they did not update the PHC timely. This patch adds a new hwtstamp_config flag HWTSTAMP_FLAG_BONDED_PHC_INDEX. When the user wants to get the bond active interface's PHC, they need to add this flag and be aware the PHC index may be changed. With the new flag. All flag checks in current drivers are removed. Only the checking in net_hwtstamp_validate() is kept. Suggested-by: Jakub Kicinski Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- include/uapi/linux/net_tstamp.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index fcc61c73a666..e258e52cfd1f 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -62,7 +62,7 @@ struct so_timestamping { /** * struct hwtstamp_config - %SIOCGHWTSTAMP and %SIOCSHWTSTAMP parameter * - * @flags: no flags defined right now, must be zero for %SIOCSHWTSTAMP + * @flags: one of HWTSTAMP_FLAG_* * @tx_type: one of HWTSTAMP_TX_* * @rx_filter: one of HWTSTAMP_FILTER_* * @@ -78,6 +78,20 @@ struct hwtstamp_config { int rx_filter; }; +/* possible values for hwtstamp_config->flags */ +enum hwtstamp_flags { + /* + * With this flag, the user could get bond active interface's + * PHC index. Note this PHC index is not stable as when there + * is a failover, the bond active interface will be changed, so + * will be the PHC index. + */ + HWTSTAMP_FLAG_BONDED_PHC_INDEX = (1<<0), + + HWTSTAMP_FLAG_LAST = HWTSTAMP_FLAG_BONDED_PHC_INDEX, + HWTSTAMP_FLAG_MASK = (HWTSTAMP_FLAG_LAST - 1) | HWTSTAMP_FLAG_LAST +}; + /* possible values for hwtstamp_config->tx_type */ enum hwtstamp_tx_types { /* -- cgit v1.2.3 From 8404b0fbc7fbd42e5c5d28cdedd450e70829c77a Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Thu, 2 Dec 2021 16:06:33 +0800 Subject: drivers/perf: hisi: Add driver for HiSilicon PCIe PMU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCIe PMU Root Complex Integrated End Point(RCiEP) device is supported to sample bandwidth, latency, buffer occupation etc. Each PMU RCiEP device monitors multiple Root Ports, and each RCiEP is registered as a PMU in /sys/bus/event_source/devices, so users can select target PMU, and use filter to do further sets. Filtering options contains: event - select the event. port - select target Root Ports. Information of Root Ports are shown under sysfs. bdf - select requester_id of target EP device. trig_len - set trigger condition for starting event statistics. trig_mode - set trigger mode. 0 means starting to statistic when bigger than trigger condition, and 1 means smaller. thr_len - set threshold for statistics. thr_mode - set threshold mode. 0 means count when bigger than threshold, and 1 means smaller. Acked-by: Krzysztof Wilczyński Reviewed-by: John Garry Signed-off-by: Qi Liu Reviewed-by: Shaokun Zhang Link: https://lore.kernel.org/r/20211202080633.2919-3-liuqi115@huawei.com Signed-off-by: Will Deacon --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 773c83730906..411a428ace4d 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -225,6 +225,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, + CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, -- cgit v1.2.3 From c8a2a011cd04054a6577d8cf774adf9e09302876 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 14 Dec 2021 03:45:35 +0200 Subject: net: dsa: sja1105: fix broken connection with the sja1110 tagger The driver was incorrectly converted assuming that "sja1105" is the only tagger supported by this driver. This results in SJA1110 switches failing to probe: sja1105 spi1.0: Unable to connect to tag protocol "sja1110": -EPROTONOSUPPORT sja1105: probe of spi1.2 failed with error -93 Add DSA_TAG_PROTO_SJA1110 to the list of supported taggers by the sja1105 driver. The sja1105_tagger_data structure format is common for the two tagging protocols. Fixes: c79e84866d2a ("net: dsa: tag_sja1105: convert to tagger-owned data") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/sja1105.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index e9cb1ae6d742..159e43171ccc 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -70,7 +70,8 @@ struct sja1105_skb_cb { static inline struct sja1105_tagger_data * sja1105_tagger_data(struct dsa_switch *ds) { - BUG_ON(ds->dst->tag_ops->proto != DSA_TAG_PROTO_SJA1105); + BUG_ON(ds->dst->tag_ops->proto != DSA_TAG_PROTO_SJA1105 && + ds->dst->tag_ops->proto != DSA_TAG_PROTO_SJA1110); return ds->tagger_data; } -- cgit v1.2.3 From 7f2973149c22e7a6fee4c0c9fa6b8e4108e9c208 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 14 Dec 2021 03:45:36 +0200 Subject: net: dsa: make tagging protocols connect to individual switches from a tree On the NXP Bluebox 3 board which uses a multi-switch setup with sja1105, the mechanism through which the tagger connects to the switch tree is broken, due to improper DSA code design. At the time when tag_ops->connect() is called in dsa_port_parse_cpu(), DSA hasn't finished "touching" all the ports, so it doesn't know how large the tree is and how many ports it has. It has just seen the first CPU port by this time. As a result, this function will call the tagger's ->connect method too early, and the tagger will connect only to the first switch from the tree. This could be perhaps addressed a bit more simply by just moving the tag_ops->connect(dst) call a bit later (for example in dsa_tree_setup), but there is already a design inconsistency at present: on the switch side, the notification is on a per-switch basis, but on the tagger side, it is on a per-tree basis. Furthermore, the persistent storage itself is per switch (ds->tagger_data). And the tagger connect and disconnect procedures (at least the ones that exist currently) could see a fair bit of simplification if they didn't have to iterate through the switches of a tree. To fix the issue, this change transforms tag_ops->connect(dst) into tag_ops->connect(ds) and moves it somewhere where we already iterate over all switches of a tree. That is in dsa_switch_setup_tag_protocol(), which is a good placement because we already have there the connection call to the switch side of things. As for the dsa_tree_bind_tag_proto() method (called from the code path that changes the tag protocol), things are a bit more complicated because we receive the tree as argument, yet when we unwind on errors, it would be nice to not call tag_ops->disconnect(ds) where we didn't previously call tag_ops->connect(ds). We didn't have this problem before because the tag_ops connection operations passed the entire dst before, and this is more fine grained now. To solve the error rewind case using the new API, we have to create yet one more cross-chip notifier for disconnection, and stay connected with the old tag protocol to all the switches in the tree until we've succeeded to connect with the new one as well. So if something fails half way, the whole tree is still connected to the old tagger. But there may still be leaks if the tagger fails to connect to the 2nd out of 3 switches in a tree: somebody needs to tell the tagger to disconnect from the first switch. Nothing comes for free, and this was previously handled privately by the tagging protocol driver before, but now we need to emit a disconnect cross-chip notifier for that, because DSA has to take care of the unwind path. We assume that the tagging protocol has connected to a switch if it has set ds->tagger_data to something, otherwise we avoid calling its disconnection method in the error rewind path. The rest of the changes are in the tagging protocol drivers, and have to do with the replacement of dst with ds. The iteration is removed and the error unwind path is simplified, as mentioned above. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 64d71968aa91..f16959444ae1 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -82,15 +82,14 @@ enum dsa_tag_protocol { }; struct dsa_switch; -struct dsa_switch_tree; struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); - int (*connect)(struct dsa_switch_tree *dst); - void (*disconnect)(struct dsa_switch_tree *dst); + int (*connect)(struct dsa_switch *ds); + void (*disconnect)(struct dsa_switch *ds); unsigned int needed_headroom; unsigned int needed_tailroom; const char *name; -- cgit v1.2.3 From 74747dda582dc7ddd8aac6841954e84b0d28e56c Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sat, 27 Nov 2021 12:46:58 +0100 Subject: media: lirc: always send timeout reports Without timeout reports, it is impossible to decode many protocols since it is not known when the transmission ends. timeout reports are sent by default, but can be turned off. There is no reason to turn them off, and I cannot find any software which does this, so we can safely remove it. This makes the ioctl LIRC_SET_REC_TIMEOUT_REPORTS a no-op. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/media/rc-core.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/media/rc-core.h b/include/media/rc-core.h index 8c5b7978e1d9..ab9d3b7cd799 100644 --- a/include/media/rc-core.h +++ b/include/media/rc-core.h @@ -59,7 +59,6 @@ enum rc_filter_type { * @rc: rcdev for this lirc chardev * @carrier_low: when setting the carrier range, first the low end must be * set with an ioctl and then the high end with another ioctl - * @send_timeout_reports: report timeouts in lirc raw IR. * @rawir: queue for incoming raw IR * @scancodes: queue for incoming decoded scancodes * @wait_poll: poll struct for lirc device @@ -72,7 +71,6 @@ struct lirc_fh { struct list_head list; struct rc_dev *rc; int carrier_low; - bool send_timeout_reports; DECLARE_KFIFO_PTR(rawir, unsigned int); DECLARE_KFIFO_PTR(scancodes, struct lirc_scancode); wait_queue_head_t wait_poll; -- cgit v1.2.3 From f6f787874aa52bbfbfd0210f519439d38fd5377f Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Thu, 2 Dec 2021 16:02:31 +0800 Subject: dt-bindings: phy: phy-imx8-pcie: Add binding for the pad modes of imx8 pcie phy Add binding for reference clock PAD modes of the i.MX8 PCIe PHY. Signed-off-by: Richard Zhu Tested-by: Marcel Ziswiler Reviewed-by: Tim Harvey Tested-by: Tim Harvey Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/1638432158-4119-2-git-send-email-hongxing.zhu@nxp.com Signed-off-by: Vinod Koul --- include/dt-bindings/phy/phy-imx8-pcie.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/dt-bindings/phy/phy-imx8-pcie.h (limited to 'include') diff --git a/include/dt-bindings/phy/phy-imx8-pcie.h b/include/dt-bindings/phy/phy-imx8-pcie.h new file mode 100644 index 000000000000..8bbe2d6538d8 --- /dev/null +++ b/include/dt-bindings/phy/phy-imx8-pcie.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */ +/* + * This header provides constants for i.MX8 PCIe. + */ + +#ifndef _DT_BINDINGS_IMX8_PCIE_H +#define _DT_BINDINGS_IMX8_PCIE_H + +/* Reference clock PAD mode */ +#define IMX8_PCIE_REFCLK_PAD_UNUSED 0 +#define IMX8_PCIE_REFCLK_PAD_INPUT 1 +#define IMX8_PCIE_REFCLK_PAD_OUTPUT 2 + +#endif /* _DT_BINDINGS_IMX8_PCIE_H */ -- cgit v1.2.3 From 391137c04ec3ab3d27aa4e3081427f490655ec6f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 10 Dec 2021 13:02:01 +0100 Subject: media: dmxdev: drop unneeded inclusion from other headers There is no evidence we need kernel.h inclusion in certain headers. Drop unneeded inclusion from other headers. Link: https://lore.kernel.org/linux-media/20211210120201.35635-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Mauro Carvalho Chehab --- include/media/dmxdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/media/dmxdev.h b/include/media/dmxdev.h index baafa3b8aca4..63219a699370 100644 --- a/include/media/dmxdev.h +++ b/include/media/dmxdev.h @@ -21,7 +21,6 @@ #include #include -#include #include #include #include -- cgit v1.2.3 From b7898396f4bbe160f546d0c5e9fa17cca9a7d153 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 7 Dec 2021 11:37:42 -0600 Subject: ASoC: soc-pcm: Fix and cleanup DPCM locking The existing locking for DPCM has several issues a) a confusing mix of card->mutex and card->pcm_mutex. b) a dpcm_lock spinlock added inconsistently and on paths that could be recursively taken. The use of irqsave/irqrestore was also overkill. The suggested model is: 1) The pcm_mutex is the top-most protection of BE links in the FE. The pcm_mutex is applied always on either the top PCM callbacks or the external call from DAPM, not taken in the internal functions. 2) the FE stream lock is taken in higher levels before invoking dpcm_be_dai_trigger() 3) when adding and deleting a BE, both the pcm_mutex and FE stream lock are taken. Signed-off-by: Takashi Iwai [clarification of commit message by plbossart] Signed-off-by: Pierre-Louis Bossart Reviewed-by: Kai Vehmanen Reviewed-by: Bard Liao Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20211207173745.15850-4-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 8e6dd8a257c5..5872a8864f3b 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -893,8 +893,6 @@ struct snd_soc_card { struct mutex pcm_mutex; enum snd_soc_pcm_subclass pcm_subclass; - spinlock_t dpcm_lock; - int (*probe)(struct snd_soc_card *card); int (*late_probe)(struct snd_soc_card *card); int (*remove)(struct snd_soc_card *card); -- cgit v1.2.3 From 848aedfdc6ba25ad5652797db9266007773e44dd Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 7 Dec 2021 11:37:44 -0600 Subject: ASoC: soc-pcm: test refcount before triggering On start/pause_release/resume, when more than one FE is connected to the same BE, it's possible that the trigger is sent more than once. This is not desirable, we only want to trigger a BE once, which is straightforward to implement with a refcount. For stop/pause/suspend, the problem is more complicated: the check implemented in snd_soc_dpcm_can_be_free_stop() may fail due to a conceptual deadlock when we trigger the BE before the FE. In this case, the FE states have not yet changed, so there are corner cases where the TRIGGER_STOP is never sent - the dual case of start where multiple triggers might be sent. This patch suggests an unconditional trigger in all cases, without checking the FE states, using a refcount protected by the BE PCM stream lock. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Kai Vehmanen Reviewed-by: Bard Liao Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20211207173745.15850-6-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc-dpcm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h index bc7af90099a8..75b92d883976 100644 --- a/include/sound/soc-dpcm.h +++ b/include/sound/soc-dpcm.h @@ -101,6 +101,8 @@ struct snd_soc_dpcm_runtime { enum snd_soc_dpcm_state state; int trigger_pending; /* trigger cmd + 1 if pending, 0 if not */ + + int be_start; /* refcount protected by BE stream pcm lock */ }; #define for_each_dpcm_fe(be, stream, _dpcm) \ -- cgit v1.2.3 From 326db0dc00e57432b689349b4da3e86c90d5d61a Mon Sep 17 00:00:00 2001 From: Yann Dirson Date: Tue, 14 Dec 2021 00:30:30 +0100 Subject: amdgpu: fix some comment typos Signed-off-by: Yann Dirson Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 26e45fc5eb1a..0b94ec7b73e7 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -80,7 +80,7 @@ extern "C" { * * %AMDGPU_GEM_DOMAIN_GTT GPU accessible system memory, mapped into the * GPU's virtual address space via gart. Gart memory linearizes non-contiguous - * pages of system memory, allows GPU access system memory in a linezrized + * pages of system memory, allows GPU access system memory in a linearized * fashion. * * %AMDGPU_GEM_DOMAIN_VRAM Local video memory. For APUs, it is memory -- cgit v1.2.3 From 0ae8c6252888d487f69b406369c3176172bb2064 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 9 Dec 2021 14:18:41 +0530 Subject: dt-bindings: interconnect: Add Qualcomm SM8450 DT bindings The Qualcomm SM8450 SoC has several bus fabrics that could be controlled and tuned dynamically according to the bandwidth demand Signed-off-by: Vinod Koul Acked-by: Rob Herring Link: https://lore.kernel.org/r/20211209084842.189627-2-vkoul@kernel.org Signed-off-by: Georgi Djakov --- include/dt-bindings/interconnect/qcom,sm8450.h | 171 +++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 include/dt-bindings/interconnect/qcom,sm8450.h (limited to 'include') diff --git a/include/dt-bindings/interconnect/qcom,sm8450.h b/include/dt-bindings/interconnect/qcom,sm8450.h new file mode 100644 index 000000000000..8f3c5e1fb4c4 --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,sm8450.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2021, Linaro Limited + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_SM8450_H +#define __DT_BINDINGS_INTERCONNECT_QCOM_SM8450_H + +#define MASTER_QSPI_0 0 +#define MASTER_QUP_1 1 +#define MASTER_A1NOC_CFG 2 +#define MASTER_SDCC_4 3 +#define MASTER_UFS_MEM 4 +#define MASTER_USB3_0 5 +#define SLAVE_A1NOC_SNOC 6 +#define SLAVE_SERVICE_A1NOC 7 + +#define MASTER_QDSS_BAM 0 +#define MASTER_QUP_0 1 +#define MASTER_QUP_2 2 +#define MASTER_A2NOC_CFG 3 +#define MASTER_CRYPTO 4 +#define MASTER_IPA 5 +#define MASTER_SENSORS_PROC 6 +#define MASTER_SP 7 +#define MASTER_QDSS_ETR 8 +#define MASTER_QDSS_ETR_1 9 +#define MASTER_SDCC_2 10 +#define SLAVE_A2NOC_SNOC 11 +#define SLAVE_SERVICE_A2NOC 12 + +#define MASTER_QUP_CORE_0 0 +#define MASTER_QUP_CORE_1 1 +#define MASTER_QUP_CORE_2 2 +#define SLAVE_QUP_CORE_0 3 +#define SLAVE_QUP_CORE_1 4 +#define SLAVE_QUP_CORE_2 5 + +#define MASTER_GEM_NOC_CNOC 0 +#define MASTER_GEM_NOC_PCIE_SNOC 1 +#define SLAVE_AHB2PHY_SOUTH 2 +#define SLAVE_AHB2PHY_NORTH 3 +#define SLAVE_AOSS 4 +#define SLAVE_CAMERA_CFG 5 +#define SLAVE_CLK_CTL 6 +#define SLAVE_CDSP_CFG 7 +#define SLAVE_RBCPR_CX_CFG 8 +#define SLAVE_RBCPR_MMCX_CFG 9 +#define SLAVE_RBCPR_MXA_CFG 10 +#define SLAVE_RBCPR_MXC_CFG 11 +#define SLAVE_CRYPTO_0_CFG 12 +#define SLAVE_CX_RDPM 13 +#define SLAVE_DISPLAY_CFG 14 +#define SLAVE_GFX3D_CFG 15 +#define SLAVE_IMEM_CFG 16 +#define SLAVE_IPA_CFG 17 +#define SLAVE_IPC_ROUTER_CFG 18 +#define SLAVE_LPASS 19 +#define SLAVE_CNOC_MSS 20 +#define SLAVE_MX_RDPM 21 +#define SLAVE_PCIE_0_CFG 22 +#define SLAVE_PCIE_1_CFG 23 +#define SLAVE_PDM 24 +#define SLAVE_PIMEM_CFG 25 +#define SLAVE_PRNG 26 +#define SLAVE_QDSS_CFG 27 +#define SLAVE_QSPI_0 28 +#define SLAVE_QUP_0 29 +#define SLAVE_QUP_1 30 +#define SLAVE_QUP_2 31 +#define SLAVE_SDCC_2 32 +#define SLAVE_SDCC_4 33 +#define SLAVE_SPSS_CFG 34 +#define SLAVE_TCSR 35 +#define SLAVE_TLMM 36 +#define SLAVE_TME_CFG 37 +#define SLAVE_UFS_MEM_CFG 38 +#define SLAVE_USB3_0 39 +#define SLAVE_VENUS_CFG 40 +#define SLAVE_VSENSE_CTRL_CFG 41 +#define SLAVE_A1NOC_CFG 42 +#define SLAVE_A2NOC_CFG 43 +#define SLAVE_DDRSS_CFG 44 +#define SLAVE_CNOC_MNOC_CFG 45 +#define SLAVE_PCIE_ANOC_CFG 46 +#define SLAVE_SNOC_CFG 47 +#define SLAVE_IMEM 48 +#define SLAVE_PIMEM 49 +#define SLAVE_SERVICE_CNOC 50 +#define SLAVE_PCIE_0 51 +#define SLAVE_PCIE_1 52 +#define SLAVE_QDSS_STM 53 +#define SLAVE_TCU 54 + +#define MASTER_GPU_TCU 0 +#define MASTER_SYS_TCU 1 +#define MASTER_APPSS_PROC 2 +#define MASTER_GFX3D 3 +#define MASTER_MSS_PROC 4 +#define MASTER_MNOC_HF_MEM_NOC 5 +#define MASTER_MNOC_SF_MEM_NOC 6 +#define MASTER_COMPUTE_NOC 7 +#define MASTER_ANOC_PCIE_GEM_NOC 8 +#define MASTER_SNOC_GC_MEM_NOC 9 +#define MASTER_SNOC_SF_MEM_NOC 10 +#define SLAVE_GEM_NOC_CNOC 11 +#define SLAVE_LLCC 12 +#define SLAVE_MEM_NOC_PCIE_SNOC 13 +#define MASTER_MNOC_HF_MEM_NOC_DISP 14 +#define MASTER_MNOC_SF_MEM_NOC_DISP 15 +#define MASTER_ANOC_PCIE_GEM_NOC_DISP 16 +#define SLAVE_LLCC_DISP 17 + +#define MASTER_CNOC_LPASS_AG_NOC 0 +#define MASTER_LPASS_PROC 1 +#define SLAVE_LPASS_CORE_CFG 2 +#define SLAVE_LPASS_LPI_CFG 3 +#define SLAVE_LPASS_MPU_CFG 4 +#define SLAVE_LPASS_TOP_CFG 5 +#define SLAVE_LPASS_SNOC 6 +#define SLAVE_SERVICES_LPASS_AML_NOC 7 +#define SLAVE_SERVICE_LPASS_AG_NOC 8 + +#define MASTER_LLCC 0 +#define SLAVE_EBI1 1 +#define MASTER_LLCC_DISP 2 +#define SLAVE_EBI1_DISP 3 + +#define MASTER_CAMNOC_HF 0 +#define MASTER_CAMNOC_ICP 1 +#define MASTER_CAMNOC_SF 2 +#define MASTER_MDP 3 +#define MASTER_CNOC_MNOC_CFG 4 +#define MASTER_ROTATOR 5 +#define MASTER_CDSP_HCP 6 +#define MASTER_VIDEO 7 +#define MASTER_VIDEO_CV_PROC 8 +#define MASTER_VIDEO_PROC 9 +#define MASTER_VIDEO_V_PROC 10 +#define SLAVE_MNOC_HF_MEM_NOC 11 +#define SLAVE_MNOC_SF_MEM_NOC 12 +#define SLAVE_SERVICE_MNOC 13 +#define MASTER_MDP_DISP 14 +#define MASTER_ROTATOR_DISP 15 +#define SLAVE_MNOC_HF_MEM_NOC_DISP 16 +#define SLAVE_MNOC_SF_MEM_NOC_DISP 17 + +#define MASTER_CDSP_NOC_CFG 0 +#define MASTER_CDSP_PROC 1 +#define SLAVE_CDSP_MEM_NOC 2 +#define SLAVE_SERVICE_NSP_NOC 3 + +#define MASTER_PCIE_ANOC_CFG 0 +#define MASTER_PCIE_0 1 +#define MASTER_PCIE_1 2 +#define SLAVE_ANOC_PCIE_GEM_NOC 3 +#define SLAVE_SERVICE_PCIE_ANOC 4 + +#define MASTER_GIC_AHB 0 +#define MASTER_A1NOC_SNOC 1 +#define MASTER_A2NOC_SNOC 2 +#define MASTER_LPASS_ANOC 3 +#define MASTER_SNOC_CFG 4 +#define MASTER_PIMEM 5 +#define MASTER_GIC 6 +#define SLAVE_SNOC_GEM_NOC_GC 7 +#define SLAVE_SNOC_GEM_NOC_SF 8 +#define SLAVE_SERVICE_SNOC 9 + +#endif -- cgit v1.2.3 From 0045e0d3f42ed7d05434bb5bc16acfc793ea4891 Mon Sep 17 00:00:00 2001 From: Yixing Liu Date: Tue, 7 Dec 2021 20:49:01 +0800 Subject: RDMA/hns: Support direct wqe of userspace The current write wqe mechanism is to write to DDR first, and then notify the hardware through doorbell to read the data. Direct wqe is a mechanism to fill wqe directly into the hardware. In the case of light load, the wqe will be filled into pcie bar space of the hardware, this will reduce one memory access operation and therefore reduce the latency. SIMD instructions allows cpu to write the 512 bits at one time to device memory, thus it can be used for posting direct wqe. Add direct wqe enable switch and address mapping. Link: https://lore.kernel.org/r/20211207124901.42123-2-liangwenpeng@huawei.com Signed-off-by: Yixing Liu Signed-off-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/hns-abi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 42b177655560..f6fde06db4b4 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -77,10 +77,12 @@ enum hns_roce_qp_cap_flags { HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, + HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5, }; struct hns_roce_ib_create_qp_resp { __aligned_u64 cap_flags; + __aligned_u64 dwqe_mmap_key; }; struct hns_roce_ib_alloc_ucontext_resp { -- cgit v1.2.3 From 9280ac2e6f199cddcd746a9ba459136b8666287b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Dec 2021 07:15:15 -0800 Subject: net: dev_replace_track() cleanup Use existing helpers (netdev_tracker_free() and netdev_tracker_alloc()) to remove ifdefery. Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20211214151515.312535-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 235d5d082f1a..c06e9dc1a317 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3885,16 +3885,14 @@ static inline void dev_replace_track(struct net_device *odev, netdevice_tracker *tracker, gfp_t gfp) { -#ifdef CONFIG_NET_DEV_REFCNT_TRACKER if (odev) - ref_tracker_free(&odev->refcnt_tracker, tracker); -#endif + netdev_tracker_free(odev, tracker); + dev_hold(ndev); dev_put(odev); -#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + if (ndev) - ref_tracker_alloc(&ndev->refcnt_tracker, tracker, gfp); -#endif + netdev_tracker_alloc(ndev, tracker, gfp); } /* Carrier loss detection, dial on demand. The functions netif_carrier_on -- cgit v1.2.3 From 6813b1928758ce64fabbb8ef157f994b7c2235fa Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 14 Dec 2021 15:16:04 -0800 Subject: mptcp: add missing documented NL params 'loc_id' and 'rem_id' are set in all events linked to subflows but those were missing in the events description in the comments. Fixes: b911c97c7dc7 ("mptcp: add netlink event support") Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index c8cc46f80a16..f106a3941cdf 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -136,19 +136,21 @@ struct mptcp_info { * MPTCP_EVENT_REMOVED: token, rem_id * An address has been lost by the peer. * - * MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6, - * daddr4 | daddr6, sport, dport, backup, - * if_idx [, error] + * MPTCP_EVENT_SUB_ESTABLISHED: token, family, loc_id, rem_id, + * saddr4 | saddr6, daddr4 | daddr6, sport, + * dport, backup, if_idx [, error] * A new subflow has been established. 'error' should not be set. * - * MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport, backup, if_idx [, error] + * MPTCP_EVENT_SUB_CLOSED: token, family, loc_id, rem_id, saddr4 | saddr6, + * daddr4 | daddr6, sport, dport, backup, if_idx + * [, error] * A subflow has been closed. An error (copy of sk_err) could be set if an * error has been detected for this subflow. * - * MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport, backup, if_idx [, error] - * The priority of a subflow has changed. 'error' should not be set. + * MPTCP_EVENT_SUB_PRIORITY: token, family, loc_id, rem_id, saddr4 | saddr6, + * daddr4 | daddr6, sport, dport, backup, if_idx + * [, error] + * The priority of a subflow has changed. 'error' should not be set. */ enum mptcp_event_type { MPTCP_EVENT_UNSPEC = 0, -- cgit v1.2.3 From 8f8ef3860d4403d1bf0887380f9e3376be092c40 Mon Sep 17 00:00:00 2001 From: Vamsi krishna Lanka Date: Mon, 6 Dec 2021 23:32:49 -0800 Subject: dt-bindings: clock: Add SDX65 GCC clock bindings Add device tree bindings for global clock controller on SDX65 SOCs. Signed-off-by: Vamsi Krishna Lanka Reviewed-by: Rob Herring Reviewed-by: Vinod Koul Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/e15509b2b7c9b600ab38c5269d4fac609c077b5b.1638861860.git.quic_vamslank@quicinc.com --- include/dt-bindings/clock/qcom,gcc-sdx65.h | 122 +++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,gcc-sdx65.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gcc-sdx65.h b/include/dt-bindings/clock/qcom,gcc-sdx65.h new file mode 100644 index 000000000000..75ecc9237d8f --- /dev/null +++ b/include/dt-bindings/clock/qcom,gcc-sdx65.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2021, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GCC_SDX65_H +#define _DT_BINDINGS_CLK_QCOM_GCC_SDX65_H + +/* GCC clocks */ +#define GPLL0 0 +#define GPLL0_OUT_EVEN 1 +#define GCC_AHB_PCIE_LINK_CLK 2 +#define GCC_BLSP1_AHB_CLK 3 +#define GCC_BLSP1_QUP1_I2C_APPS_CLK 4 +#define GCC_BLSP1_QUP1_I2C_APPS_CLK_SRC 5 +#define GCC_BLSP1_QUP1_SPI_APPS_CLK 6 +#define GCC_BLSP1_QUP1_SPI_APPS_CLK_SRC 7 +#define GCC_BLSP1_QUP2_I2C_APPS_CLK 8 +#define GCC_BLSP1_QUP2_I2C_APPS_CLK_SRC 9 +#define GCC_BLSP1_QUP2_SPI_APPS_CLK 10 +#define GCC_BLSP1_QUP2_SPI_APPS_CLK_SRC 11 +#define GCC_BLSP1_QUP3_I2C_APPS_CLK 12 +#define GCC_BLSP1_QUP3_I2C_APPS_CLK_SRC 13 +#define GCC_BLSP1_QUP3_SPI_APPS_CLK 14 +#define GCC_BLSP1_QUP3_SPI_APPS_CLK_SRC 15 +#define GCC_BLSP1_QUP4_I2C_APPS_CLK 16 +#define GCC_BLSP1_QUP4_I2C_APPS_CLK_SRC 17 +#define GCC_BLSP1_QUP4_SPI_APPS_CLK 18 +#define GCC_BLSP1_QUP4_SPI_APPS_CLK_SRC 19 +#define GCC_BLSP1_SLEEP_CLK 20 +#define GCC_BLSP1_UART1_APPS_CLK 21 +#define GCC_BLSP1_UART1_APPS_CLK_SRC 22 +#define GCC_BLSP1_UART2_APPS_CLK 23 +#define GCC_BLSP1_UART2_APPS_CLK_SRC 24 +#define GCC_BLSP1_UART3_APPS_CLK 25 +#define GCC_BLSP1_UART3_APPS_CLK_SRC 26 +#define GCC_BLSP1_UART4_APPS_CLK 27 +#define GCC_BLSP1_UART4_APPS_CLK_SRC 28 +#define GCC_BOOT_ROM_AHB_CLK 29 +#define GCC_CPUSS_AHB_CLK 30 +#define GCC_CPUSS_AHB_CLK_SRC 31 +#define GCC_CPUSS_AHB_POSTDIV_CLK_SRC 32 +#define GCC_CPUSS_GNOC_CLK 33 +#define GCC_GP1_CLK 34 +#define GCC_GP1_CLK_SRC 35 +#define GCC_GP2_CLK 36 +#define GCC_GP2_CLK_SRC 37 +#define GCC_GP3_CLK 38 +#define GCC_GP3_CLK_SRC 39 +#define GCC_PCIE_0_CLKREF_EN 40 +#define GCC_PCIE_AUX_CLK 41 +#define GCC_PCIE_AUX_CLK_SRC 42 +#define GCC_PCIE_AUX_PHY_CLK_SRC 43 +#define GCC_PCIE_CFG_AHB_CLK 44 +#define GCC_PCIE_MSTR_AXI_CLK 45 +#define GCC_PCIE_PIPE_CLK 46 +#define GCC_PCIE_PIPE_CLK_SRC 47 +#define GCC_PCIE_RCHNG_PHY_CLK 48 +#define GCC_PCIE_RCHNG_PHY_CLK_SRC 49 +#define GCC_PCIE_SLEEP_CLK 50 +#define GCC_PCIE_SLV_AXI_CLK 51 +#define GCC_PCIE_SLV_Q2A_AXI_CLK 52 +#define GCC_PDM2_CLK 53 +#define GCC_PDM2_CLK_SRC 54 +#define GCC_PDM_AHB_CLK 55 +#define GCC_PDM_XO4_CLK 56 +#define GCC_RX1_USB2_CLKREF_EN 57 +#define GCC_SDCC1_AHB_CLK 58 +#define GCC_SDCC1_APPS_CLK 59 +#define GCC_SDCC1_APPS_CLK_SRC 60 +#define GCC_SPMI_FETCHER_AHB_CLK 61 +#define GCC_SPMI_FETCHER_CLK 62 +#define GCC_SPMI_FETCHER_CLK_SRC 63 +#define GCC_SYS_NOC_CPUSS_AHB_CLK 64 +#define GCC_USB30_MASTER_CLK 65 +#define GCC_USB30_MASTER_CLK_SRC 66 +#define GCC_USB30_MOCK_UTMI_CLK 67 +#define GCC_USB30_MOCK_UTMI_CLK_SRC 68 +#define GCC_USB30_MOCK_UTMI_POSTDIV_CLK_SRC 69 +#define GCC_USB30_MSTR_AXI_CLK 70 +#define GCC_USB30_SLEEP_CLK 71 +#define GCC_USB30_SLV_AHB_CLK 72 +#define GCC_USB3_PHY_AUX_CLK 73 +#define GCC_USB3_PHY_AUX_CLK_SRC 74 +#define GCC_USB3_PHY_PIPE_CLK 75 +#define GCC_USB3_PHY_PIPE_CLK_SRC 76 +#define GCC_USB3_PRIM_CLKREF_EN 77 +#define GCC_USB_PHY_CFG_AHB2PHY_CLK 78 +#define GCC_XO_DIV4_CLK 79 +#define GCC_XO_PCIE_LINK_CLK 80 + +/* GCC resets */ +#define GCC_BLSP1_QUP1_BCR 0 +#define GCC_BLSP1_QUP2_BCR 1 +#define GCC_BLSP1_QUP3_BCR 2 +#define GCC_BLSP1_QUP4_BCR 3 +#define GCC_BLSP1_UART1_BCR 4 +#define GCC_BLSP1_UART2_BCR 5 +#define GCC_BLSP1_UART3_BCR 6 +#define GCC_BLSP1_UART4_BCR 7 +#define GCC_PCIE_BCR 8 +#define GCC_PCIE_LINK_DOWN_BCR 9 +#define GCC_PCIE_NOCSR_COM_PHY_BCR 10 +#define GCC_PCIE_PHY_BCR 11 +#define GCC_PCIE_PHY_CFG_AHB_BCR 12 +#define GCC_PCIE_PHY_COM_BCR 13 +#define GCC_PCIE_PHY_NOCSR_COM_PHY_BCR 14 +#define GCC_PDM_BCR 15 +#define GCC_QUSB2PHY_BCR 16 +#define GCC_SDCC1_BCR 17 +#define GCC_SPMI_FETCHER_BCR 18 +#define GCC_TCSR_PCIE_BCR 19 +#define GCC_USB30_BCR 20 +#define GCC_USB3_PHY_BCR 21 +#define GCC_USB3PHY_PHY_BCR 22 +#define GCC_USB_PHY_CFG_AHB2PHY_BCR 23 + +/* GCC power domains */ +#define USB30_GDSC 0 +#define PCIE_GDSC 1 + +#endif -- cgit v1.2.3 From 72a0ca203ca7fae34fe61668906fe483b97d9039 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 7 Dec 2021 17:10:02 +0530 Subject: dt-bindings: clock: Add SM8450 GCC clock bindings Add device tree bindings for global clock controller on SM8450 SoCs. Signed-off-by: Vinod Koul Reviewed-by: Stephen Boyd Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211207114003.100693-2-vkoul@kernel.org --- include/dt-bindings/clock/qcom,gcc-sm8450.h | 244 ++++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,gcc-sm8450.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gcc-sm8450.h b/include/dt-bindings/clock/qcom,gcc-sm8450.h new file mode 100644 index 000000000000..cf1469312c4c --- /dev/null +++ b/include/dt-bindings/clock/qcom,gcc-sm8450.h @@ -0,0 +1,244 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2021, Linaro Limited + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GCC_SM8450_H +#define _DT_BINDINGS_CLK_QCOM_GCC_SM8450_H + +/* GCC HW clocks */ +#define CORE_BI_PLL_TEST_SE 0 +#define PCIE_0_PIPE_CLK 1 +#define PCIE_1_PHY_AUX_CLK 2 +#define PCIE_1_PIPE_CLK 3 +#define UFS_PHY_RX_SYMBOL_0_CLK 4 +#define UFS_PHY_RX_SYMBOL_1_CLK 5 +#define UFS_PHY_TX_SYMBOL_0_CLK 6 +#define USB3_PHY_WRAPPER_GCC_USB30_PIPE_CLK 7 + +/* GCC clocks */ +#define GCC_AGGRE_NOC_PCIE_0_AXI_CLK 8 +#define GCC_AGGRE_NOC_PCIE_1_AXI_CLK 9 +#define GCC_AGGRE_UFS_PHY_AXI_CLK 10 +#define GCC_AGGRE_UFS_PHY_AXI_HW_CTL_CLK 11 +#define GCC_AGGRE_USB3_PRIM_AXI_CLK 12 +#define GCC_ANOC_PCIE_PWRCTL_CLK 13 +#define GCC_BOOT_ROM_AHB_CLK 14 +#define GCC_CAMERA_AHB_CLK 15 +#define GCC_CAMERA_HF_AXI_CLK 16 +#define GCC_CAMERA_SF_AXI_CLK 17 +#define GCC_CAMERA_XO_CLK 18 +#define GCC_CFG_NOC_PCIE_ANOC_AHB_CLK 19 +#define GCC_CFG_NOC_USB3_PRIM_AXI_CLK 20 +#define GCC_CPUSS_AHB_CLK 21 +#define GCC_CPUSS_AHB_CLK_SRC 22 +#define GCC_CPUSS_AHB_POSTDIV_CLK_SRC 23 +#define GCC_CPUSS_CONFIG_NOC_SF_CLK 24 +#define GCC_DDRSS_GPU_AXI_CLK 25 +#define GCC_DDRSS_PCIE_SF_TBU_CLK 26 +#define GCC_DISP_AHB_CLK 27 +#define GCC_DISP_HF_AXI_CLK 28 +#define GCC_DISP_SF_AXI_CLK 29 +#define GCC_DISP_XO_CLK 30 +#define GCC_EUSB3_0_CLKREF_EN 31 +#define GCC_GP1_CLK 32 +#define GCC_GP1_CLK_SRC 33 +#define GCC_GP2_CLK 34 +#define GCC_GP2_CLK_SRC 35 +#define GCC_GP3_CLK 36 +#define GCC_GP3_CLK_SRC 37 +#define GCC_GPLL0 38 +#define GCC_GPLL0_OUT_EVEN 39 +#define GCC_GPLL4 40 +#define GCC_GPLL9 41 +#define GCC_GPU_CFG_AHB_CLK 42 +#define GCC_GPU_GPLL0_CLK_SRC 43 +#define GCC_GPU_GPLL0_DIV_CLK_SRC 44 +#define GCC_GPU_MEMNOC_GFX_CLK 45 +#define GCC_GPU_SNOC_DVM_GFX_CLK 46 +#define GCC_PCIE_0_AUX_CLK 47 +#define GCC_PCIE_0_AUX_CLK_SRC 48 +#define GCC_PCIE_0_CFG_AHB_CLK 49 +#define GCC_PCIE_0_CLKREF_EN 50 +#define GCC_PCIE_0_MSTR_AXI_CLK 51 +#define GCC_PCIE_0_PHY_RCHNG_CLK 52 +#define GCC_PCIE_0_PHY_RCHNG_CLK_SRC 53 +#define GCC_PCIE_0_PIPE_CLK 54 +#define GCC_PCIE_0_PIPE_CLK_SRC 55 +#define GCC_PCIE_0_SLV_AXI_CLK 56 +#define GCC_PCIE_0_SLV_Q2A_AXI_CLK 57 +#define GCC_PCIE_1_AUX_CLK 58 +#define GCC_PCIE_1_AUX_CLK_SRC 59 +#define GCC_PCIE_1_CFG_AHB_CLK 60 +#define GCC_PCIE_1_CLKREF_EN 61 +#define GCC_PCIE_1_MSTR_AXI_CLK 62 +#define GCC_PCIE_1_PHY_AUX_CLK 63 +#define GCC_PCIE_1_PHY_AUX_CLK_SRC 64 +#define GCC_PCIE_1_PHY_RCHNG_CLK 65 +#define GCC_PCIE_1_PHY_RCHNG_CLK_SRC 66 +#define GCC_PCIE_1_PIPE_CLK 67 +#define GCC_PCIE_1_PIPE_CLK_SRC 68 +#define GCC_PCIE_1_SLV_AXI_CLK 69 +#define GCC_PCIE_1_SLV_Q2A_AXI_CLK 70 +#define GCC_PDM2_CLK 71 +#define GCC_PDM2_CLK_SRC 72 +#define GCC_PDM_AHB_CLK 73 +#define GCC_PDM_XO4_CLK 74 +#define GCC_QMIP_CAMERA_NRT_AHB_CLK 75 +#define GCC_QMIP_CAMERA_RT_AHB_CLK 76 +#define GCC_QMIP_DISP_AHB_CLK 77 +#define GCC_QMIP_GPU_AHB_CLK 78 +#define GCC_QMIP_PCIE_AHB_CLK 79 +#define GCC_QMIP_VIDEO_CV_CPU_AHB_CLK 80 +#define GCC_QMIP_VIDEO_CVP_AHB_CLK 81 +#define GCC_QMIP_VIDEO_V_CPU_AHB_CLK 82 +#define GCC_QMIP_VIDEO_VCODEC_AHB_CLK 83 +#define GCC_QUPV3_WRAP0_CORE_2X_CLK 84 +#define GCC_QUPV3_WRAP0_CORE_CLK 85 +#define GCC_QUPV3_WRAP0_S0_CLK 86 +#define GCC_QUPV3_WRAP0_S0_CLK_SRC 87 +#define GCC_QUPV3_WRAP0_S1_CLK 88 +#define GCC_QUPV3_WRAP0_S1_CLK_SRC 89 +#define GCC_QUPV3_WRAP0_S2_CLK 90 +#define GCC_QUPV3_WRAP0_S2_CLK_SRC 91 +#define GCC_QUPV3_WRAP0_S3_CLK 92 +#define GCC_QUPV3_WRAP0_S3_CLK_SRC 93 +#define GCC_QUPV3_WRAP0_S4_CLK 94 +#define GCC_QUPV3_WRAP0_S4_CLK_SRC 95 +#define GCC_QUPV3_WRAP0_S5_CLK 96 +#define GCC_QUPV3_WRAP0_S5_CLK_SRC 97 +#define GCC_QUPV3_WRAP0_S6_CLK 98 +#define GCC_QUPV3_WRAP0_S6_CLK_SRC 99 +#define GCC_QUPV3_WRAP0_S7_CLK 100 +#define GCC_QUPV3_WRAP0_S7_CLK_SRC 101 +#define GCC_QUPV3_WRAP1_CORE_2X_CLK 102 +#define GCC_QUPV3_WRAP1_CORE_CLK 103 +#define GCC_QUPV3_WRAP1_S0_CLK 104 +#define GCC_QUPV3_WRAP1_S0_CLK_SRC 105 +#define GCC_QUPV3_WRAP1_S1_CLK 106 +#define GCC_QUPV3_WRAP1_S1_CLK_SRC 107 +#define GCC_QUPV3_WRAP1_S2_CLK 108 +#define GCC_QUPV3_WRAP1_S2_CLK_SRC 109 +#define GCC_QUPV3_WRAP1_S3_CLK 110 +#define GCC_QUPV3_WRAP1_S3_CLK_SRC 111 +#define GCC_QUPV3_WRAP1_S4_CLK 112 +#define GCC_QUPV3_WRAP1_S4_CLK_SRC 113 +#define GCC_QUPV3_WRAP1_S5_CLK 114 +#define GCC_QUPV3_WRAP1_S5_CLK_SRC 115 +#define GCC_QUPV3_WRAP1_S6_CLK 116 +#define GCC_QUPV3_WRAP1_S6_CLK_SRC 117 +#define GCC_QUPV3_WRAP2_CORE_2X_CLK 118 +#define GCC_QUPV3_WRAP2_CORE_CLK 119 +#define GCC_QUPV3_WRAP2_S0_CLK 120 +#define GCC_QUPV3_WRAP2_S0_CLK_SRC 121 +#define GCC_QUPV3_WRAP2_S1_CLK 122 +#define GCC_QUPV3_WRAP2_S1_CLK_SRC 123 +#define GCC_QUPV3_WRAP2_S2_CLK 124 +#define GCC_QUPV3_WRAP2_S2_CLK_SRC 125 +#define GCC_QUPV3_WRAP2_S3_CLK 126 +#define GCC_QUPV3_WRAP2_S3_CLK_SRC 127 +#define GCC_QUPV3_WRAP2_S4_CLK 128 +#define GCC_QUPV3_WRAP2_S4_CLK_SRC 129 +#define GCC_QUPV3_WRAP2_S5_CLK 130 +#define GCC_QUPV3_WRAP2_S5_CLK_SRC 131 +#define GCC_QUPV3_WRAP2_S6_CLK 132 +#define GCC_QUPV3_WRAP2_S6_CLK_SRC 133 +#define GCC_QUPV3_WRAP_0_M_AHB_CLK 134 +#define GCC_QUPV3_WRAP_0_S_AHB_CLK 135 +#define GCC_QUPV3_WRAP_1_M_AHB_CLK 136 +#define GCC_QUPV3_WRAP_1_S_AHB_CLK 137 +#define GCC_QUPV3_WRAP_2_M_AHB_CLK 138 +#define GCC_QUPV3_WRAP_2_S_AHB_CLK 139 +#define GCC_SDCC2_AHB_CLK 140 +#define GCC_SDCC2_APPS_CLK 141 +#define GCC_SDCC2_APPS_CLK_SRC 142 +#define GCC_SDCC2_AT_CLK 143 +#define GCC_SDCC4_AHB_CLK 144 +#define GCC_SDCC4_APPS_CLK 145 +#define GCC_SDCC4_APPS_CLK_SRC 146 +#define GCC_SDCC4_AT_CLK 147 +#define GCC_SYS_NOC_CPUSS_AHB_CLK 148 +#define GCC_UFS_0_CLKREF_EN 149 +#define GCC_UFS_PHY_AHB_CLK 150 +#define GCC_UFS_PHY_AXI_CLK 151 +#define GCC_UFS_PHY_AXI_CLK_SRC 152 +#define GCC_UFS_PHY_AXI_HW_CTL_CLK 153 +#define GCC_UFS_PHY_ICE_CORE_CLK 154 +#define GCC_UFS_PHY_ICE_CORE_CLK_SRC 155 +#define GCC_UFS_PHY_ICE_CORE_HW_CTL_CLK 156 +#define GCC_UFS_PHY_PHY_AUX_CLK 157 +#define GCC_UFS_PHY_PHY_AUX_CLK_SRC 158 +#define GCC_UFS_PHY_PHY_AUX_HW_CTL_CLK 159 +#define GCC_UFS_PHY_RX_SYMBOL_0_CLK 160 +#define GCC_UFS_PHY_RX_SYMBOL_0_CLK_SRC 161 +#define GCC_UFS_PHY_RX_SYMBOL_1_CLK 162 +#define GCC_UFS_PHY_RX_SYMBOL_1_CLK_SRC 163 +#define GCC_UFS_PHY_TX_SYMBOL_0_CLK 164 +#define GCC_UFS_PHY_TX_SYMBOL_0_CLK_SRC 165 +#define GCC_UFS_PHY_UNIPRO_CORE_CLK 166 +#define GCC_UFS_PHY_UNIPRO_CORE_CLK_SRC 167 +#define GCC_UFS_PHY_UNIPRO_CORE_HW_CTL_CLK 168 +#define GCC_USB30_PRIM_MASTER_CLK 169 +#define GCC_USB30_PRIM_MASTER_CLK_SRC 170 +#define GCC_USB30_PRIM_MOCK_UTMI_CLK 171 +#define GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC 172 +#define GCC_USB30_PRIM_MOCK_UTMI_POSTDIV_CLK_SRC 173 +#define GCC_USB30_PRIM_SLEEP_CLK 174 +#define GCC_USB3_0_CLKREF_EN 175 +#define GCC_USB3_PRIM_PHY_AUX_CLK 176 +#define GCC_USB3_PRIM_PHY_AUX_CLK_SRC 177 +#define GCC_USB3_PRIM_PHY_COM_AUX_CLK 178 +#define GCC_USB3_PRIM_PHY_PIPE_CLK 179 +#define GCC_USB3_PRIM_PHY_PIPE_CLK_SRC 180 +#define GCC_VIDEO_AHB_CLK 181 +#define GCC_VIDEO_AXI0_CLK 182 +#define GCC_VIDEO_AXI1_CLK 183 +#define GCC_VIDEO_XO_CLK 184 + +/* GCC resets */ +#define GCC_CAMERA_BCR 0 +#define GCC_DISPLAY_BCR 1 +#define GCC_GPU_BCR 2 +#define GCC_MMSS_BCR 3 +#define GCC_PCIE_0_BCR 4 +#define GCC_PCIE_0_LINK_DOWN_BCR 5 +#define GCC_PCIE_0_NOCSR_COM_PHY_BCR 6 +#define GCC_PCIE_0_PHY_BCR 7 +#define GCC_PCIE_0_PHY_NOCSR_COM_PHY_BCR 8 +#define GCC_PCIE_1_BCR 9 +#define GCC_PCIE_1_LINK_DOWN_BCR 10 +#define GCC_PCIE_1_NOCSR_COM_PHY_BCR 11 +#define GCC_PCIE_1_PHY_BCR 12 +#define GCC_PCIE_1_PHY_NOCSR_COM_PHY_BCR 13 +#define GCC_PCIE_PHY_BCR 14 +#define GCC_PCIE_PHY_CFG_AHB_BCR 15 +#define GCC_PCIE_PHY_COM_BCR 16 +#define GCC_PDM_BCR 17 +#define GCC_QUPV3_WRAPPER_0_BCR 18 +#define GCC_QUPV3_WRAPPER_1_BCR 19 +#define GCC_QUPV3_WRAPPER_2_BCR 20 +#define GCC_QUSB2PHY_PRIM_BCR 21 +#define GCC_QUSB2PHY_SEC_BCR 22 +#define GCC_SDCC2_BCR 23 +#define GCC_SDCC4_BCR 24 +#define GCC_UFS_PHY_BCR 25 +#define GCC_USB30_PRIM_BCR 26 +#define GCC_USB3_DP_PHY_PRIM_BCR 27 +#define GCC_USB3_DP_PHY_SEC_BCR 28 +#define GCC_USB3_PHY_PRIM_BCR 29 +#define GCC_USB3_PHY_SEC_BCR 30 +#define GCC_USB3PHY_PHY_PRIM_BCR 31 +#define GCC_USB3PHY_PHY_SEC_BCR 32 +#define GCC_USB_PHY_CFG_AHB2PHY_BCR 33 +#define GCC_VIDEO_AXI0_CLK_ARES 34 +#define GCC_VIDEO_AXI1_CLK_ARES 35 +#define GCC_VIDEO_BCR 36 + +/* GCC power domains */ +#define PCIE_0_GDSC 0 +#define PCIE_1_GDSC 1 +#define UFS_PHY_GDSC 2 +#define USB30_PRIM_GDSC 3 + +#endif -- cgit v1.2.3 From 061dbde2bf3b12d80a4efd4b40db0b272e55b7f5 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 15 Dec 2021 08:23:23 +0800 Subject: dt-bindings: interconnect: Add Qualcomm QCM2290 NoC support Add bindings for Qualcomm QCM2290 Network-On-Chip interconnect devices. Signed-off-by: Shawn Guo Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20211215002324.1727-5-shawn.guo@linaro.org Signed-off-by: Georgi Djakov --- include/dt-bindings/interconnect/qcom,qcm2290.h | 94 +++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 include/dt-bindings/interconnect/qcom,qcm2290.h (limited to 'include') diff --git a/include/dt-bindings/interconnect/qcom,qcm2290.h b/include/dt-bindings/interconnect/qcom,qcm2290.h new file mode 100644 index 000000000000..6cbbb7fe0bd3 --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,qcm2290.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* QCM2290 interconnect IDs */ + +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_QCM2290_H +#define __DT_BINDINGS_INTERCONNECT_QCOM_QCM2290_H + +/* BIMC */ +#define MASTER_APPSS_PROC 0 +#define MASTER_SNOC_BIMC_RT 1 +#define MASTER_SNOC_BIMC_NRT 2 +#define MASTER_SNOC_BIMC 3 +#define MASTER_TCU_0 4 +#define MASTER_GFX3D 5 +#define SLAVE_EBI1 6 +#define SLAVE_BIMC_SNOC 7 + +/* CNOC */ +#define MASTER_SNOC_CNOC 0 +#define MASTER_QDSS_DAP 1 +#define SLAVE_BIMC_CFG 2 +#define SLAVE_CAMERA_NRT_THROTTLE_CFG 3 +#define SLAVE_CAMERA_RT_THROTTLE_CFG 4 +#define SLAVE_CAMERA_CFG 5 +#define SLAVE_CLK_CTL 6 +#define SLAVE_CRYPTO_0_CFG 7 +#define SLAVE_DISPLAY_CFG 8 +#define SLAVE_DISPLAY_THROTTLE_CFG 9 +#define SLAVE_GPU_CFG 10 +#define SLAVE_HWKM 11 +#define SLAVE_IMEM_CFG 12 +#define SLAVE_IPA_CFG 13 +#define SLAVE_LPASS 14 +#define SLAVE_MESSAGE_RAM 15 +#define SLAVE_PDM 16 +#define SLAVE_PIMEM_CFG 17 +#define SLAVE_PKA_WRAPPER 18 +#define SLAVE_PMIC_ARB 19 +#define SLAVE_PRNG 20 +#define SLAVE_QDSS_CFG 21 +#define SLAVE_QM_CFG 22 +#define SLAVE_QM_MPU_CFG 23 +#define SLAVE_QPIC 24 +#define SLAVE_QUP_0 25 +#define SLAVE_SDCC_1 26 +#define SLAVE_SDCC_2 27 +#define SLAVE_SNOC_CFG 28 +#define SLAVE_TCSR 29 +#define SLAVE_USB3 30 +#define SLAVE_VENUS_CFG 31 +#define SLAVE_VENUS_THROTTLE_CFG 32 +#define SLAVE_VSENSE_CTRL_CFG 33 +#define SLAVE_SERVICE_CNOC 34 + +/* SNOC */ +#define MASTER_CRYPTO_CORE0 0 +#define MASTER_SNOC_CFG 1 +#define MASTER_TIC 2 +#define MASTER_ANOC_SNOC 3 +#define MASTER_BIMC_SNOC 4 +#define MASTER_PIMEM 5 +#define MASTER_QDSS_BAM 6 +#define MASTER_QUP_0 7 +#define MASTER_IPA 8 +#define MASTER_QDSS_ETR 9 +#define MASTER_SDCC_1 10 +#define MASTER_SDCC_2 11 +#define MASTER_QPIC 12 +#define MASTER_USB3_0 13 +#define SLAVE_APPSS 14 +#define SLAVE_SNOC_CNOC 15 +#define SLAVE_IMEM 16 +#define SLAVE_PIMEM 17 +#define SLAVE_SNOC_BIMC 18 +#define SLAVE_SERVICE_SNOC 19 +#define SLAVE_QDSS_STM 20 +#define SLAVE_TCU 21 +#define SLAVE_ANOC_SNOC 22 + +/* QUP Virtual */ +#define MASTER_QUP_CORE_0 0 +#define SLAVE_QUP_CORE_0 1 + +/* MMNRT Virtual */ +#define MASTER_CAMNOC_SF 0 +#define MASTER_VIDEO_P0 1 +#define MASTER_VIDEO_PROC 2 +#define SLAVE_SNOC_BIMC_NRT 3 + +/* MMRT Virtual */ +#define MASTER_CAMNOC_HF 0 +#define MASTER_MDP0 1 +#define SLAVE_SNOC_BIMC_RT 2 + +#endif -- cgit v1.2.3 From ad69cd9972e79aba103ba5365de0acd35770c265 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 29 Nov 2021 22:15:27 +0200 Subject: fsnotify: clarify object type argument In preparation for separating object type from iterator type, rename some 'type' arguments in functions to 'obj_type' and remove the unused interface to clear marks by object type mask. Link: https://lore.kernel.org/r/20211129201537.1932819-2-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 51ef2b079bfa..b9c84b1dbcc8 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -338,6 +338,7 @@ static inline struct fs_error_report *fsnotify_data_error_report( } enum fsnotify_obj_type { + FSNOTIFY_OBJ_TYPE_ANY = -1, FSNOTIFY_OBJ_TYPE_INODE, FSNOTIFY_OBJ_TYPE_PARENT, FSNOTIFY_OBJ_TYPE_VFSMOUNT, @@ -346,15 +347,9 @@ enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT }; -#define FSNOTIFY_OBJ_TYPE_INODE_FL (1U << FSNOTIFY_OBJ_TYPE_INODE) -#define FSNOTIFY_OBJ_TYPE_PARENT_FL (1U << FSNOTIFY_OBJ_TYPE_PARENT) -#define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT) -#define FSNOTIFY_OBJ_TYPE_SB_FL (1U << FSNOTIFY_OBJ_TYPE_SB) -#define FSNOTIFY_OBJ_ALL_TYPES_MASK ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1) - -static inline bool fsnotify_valid_obj_type(unsigned int type) +static inline bool fsnotify_valid_obj_type(unsigned int obj_type) { - return (type < FSNOTIFY_OBJ_TYPE_COUNT); + return (obj_type < FSNOTIFY_OBJ_TYPE_COUNT); } struct fsnotify_iter_info { @@ -387,7 +382,7 @@ static inline void fsnotify_iter_set_report_type_mark( static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ struct fsnotify_iter_info *iter_info) \ { \ - return (iter_info->report_mask & FSNOTIFY_OBJ_TYPE_##NAME##_FL) ? \ + return (iter_info->report_mask & (1U << FSNOTIFY_OBJ_TYPE_##NAME)) ? \ iter_info->marks[FSNOTIFY_OBJ_TYPE_##NAME] : NULL; \ } @@ -604,11 +599,11 @@ extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn, __kernel_fsid_t *fsid); /* attach the mark to the object */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int type, + fsnotify_connp_t *connp, unsigned int obj_type, int allow_dups, __kernel_fsid_t *fsid); extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_connp_t *connp, - unsigned int type, int allow_dups, + unsigned int obj_type, int allow_dups, __kernel_fsid_t *fsid); /* attach the mark to the inode */ @@ -637,22 +632,23 @@ extern void fsnotify_detach_mark(struct fsnotify_mark *mark); extern void fsnotify_free_mark(struct fsnotify_mark *mark); /* Wait until all marks queued for destruction are destroyed */ extern void fsnotify_wait_marks_destroyed(void); -/* run all the marks in a group, and clear all of the marks attached to given object type */ -extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type); +/* Clear all of the marks of a group attached to a given object type */ +extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, + unsigned int obj_type); /* run all the marks in a group, and clear all of the vfsmount marks */ static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) { - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); } /* run all the marks in a group, and clear all of the inode marks */ static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) { - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE_FL); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE); } /* run all the marks in a group, and clear all of the sn marks */ static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group) { - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB_FL); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB); } extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); -- cgit v1.2.3 From 1c9007d62bea6fd164285314f7553f73e5308863 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 29 Nov 2021 22:15:28 +0200 Subject: fsnotify: separate mark iterator type from object type enum They are two different types that use the same enum, so this confusing. Use the object type to indicate the type of object mark is attached to and the iter type to indicate the type of watch. A group can have two different watches of the same object type (parent and child watches) that match the same event. Link: https://lore.kernel.org/r/20211129201537.1932819-3-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 41 +++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index b9c84b1dbcc8..73739fee1710 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -337,10 +337,25 @@ static inline struct fs_error_report *fsnotify_data_error_report( } } +/* + * Index to merged marks iterator array that correlates to a type of watch. + * The type of watched object can be deduced from the iterator type, but not + * the other way around, because an event can match different watched objects + * of the same object type. + * For example, both parent and child are watching an object of type inode. + */ +enum fsnotify_iter_type { + FSNOTIFY_ITER_TYPE_INODE, + FSNOTIFY_ITER_TYPE_VFSMOUNT, + FSNOTIFY_ITER_TYPE_SB, + FSNOTIFY_ITER_TYPE_PARENT, + FSNOTIFY_ITER_TYPE_COUNT +}; + +/* The type of object that a mark is attached to */ enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_ANY = -1, FSNOTIFY_OBJ_TYPE_INODE, - FSNOTIFY_OBJ_TYPE_PARENT, FSNOTIFY_OBJ_TYPE_VFSMOUNT, FSNOTIFY_OBJ_TYPE_SB, FSNOTIFY_OBJ_TYPE_COUNT, @@ -353,37 +368,37 @@ static inline bool fsnotify_valid_obj_type(unsigned int obj_type) } struct fsnotify_iter_info { - struct fsnotify_mark *marks[FSNOTIFY_OBJ_TYPE_COUNT]; + struct fsnotify_mark *marks[FSNOTIFY_ITER_TYPE_COUNT]; unsigned int report_mask; int srcu_idx; }; static inline bool fsnotify_iter_should_report_type( - struct fsnotify_iter_info *iter_info, int type) + struct fsnotify_iter_info *iter_info, int iter_type) { - return (iter_info->report_mask & (1U << type)); + return (iter_info->report_mask & (1U << iter_type)); } static inline void fsnotify_iter_set_report_type( - struct fsnotify_iter_info *iter_info, int type) + struct fsnotify_iter_info *iter_info, int iter_type) { - iter_info->report_mask |= (1U << type); + iter_info->report_mask |= (1U << iter_type); } static inline void fsnotify_iter_set_report_type_mark( - struct fsnotify_iter_info *iter_info, int type, + struct fsnotify_iter_info *iter_info, int iter_type, struct fsnotify_mark *mark) { - iter_info->marks[type] = mark; - iter_info->report_mask |= (1U << type); + iter_info->marks[iter_type] = mark; + iter_info->report_mask |= (1U << iter_type); } #define FSNOTIFY_ITER_FUNCS(name, NAME) \ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ struct fsnotify_iter_info *iter_info) \ { \ - return (iter_info->report_mask & (1U << FSNOTIFY_OBJ_TYPE_##NAME)) ? \ - iter_info->marks[FSNOTIFY_OBJ_TYPE_##NAME] : NULL; \ + return (iter_info->report_mask & (1U << FSNOTIFY_ITER_TYPE_##NAME)) ? \ + iter_info->marks[FSNOTIFY_ITER_TYPE_##NAME] : NULL; \ } FSNOTIFY_ITER_FUNCS(inode, INODE) @@ -391,8 +406,8 @@ FSNOTIFY_ITER_FUNCS(parent, PARENT) FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT) FSNOTIFY_ITER_FUNCS(sb, SB) -#define fsnotify_foreach_obj_type(type) \ - for (type = 0; type < FSNOTIFY_OBJ_TYPE_COUNT; type++) +#define fsnotify_foreach_iter_type(type) \ + for (type = 0; type < FSNOTIFY_ITER_TYPE_COUNT; type++) /* * fsnotify_connp_t is what we embed in objects which connector can be attached -- cgit v1.2.3 From d61fd650e9d206a71fda789f02a1ced4b19944c4 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 29 Nov 2021 22:15:29 +0200 Subject: fanotify: introduce group flag FAN_REPORT_TARGET_FID FAN_REPORT_FID is ambiguous in that it reports the fid of the child for some events and the fid of the parent for create/delete/move events. The new FAN_REPORT_TARGET_FID flag is an implicit request to report the fid of the target object of the operation (a.k.a the child inode) also in create/delete/move events in addition to the fid of the parent and the name of the child. To reduce the test matrix for uninteresting use cases, the new FAN_REPORT_TARGET_FID flag requires both FAN_REPORT_NAME and FAN_REPORT_FID. The convenience macro FAN_REPORT_DFID_NAME_TARGET combines FAN_REPORT_TARGET_FID with all the required flags. Link: https://lore.kernel.org/r/20211129201537.1932819-4-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fanotify.h | 2 +- include/uapi/linux/fanotify.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 616af2ea20f3..376e050e6f38 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -25,7 +25,7 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ #define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FANOTIFY_PERM_CLASSES) -#define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME) +#define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET) #define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD) diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index bd1932c2074d..60f73639a896 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -57,9 +57,13 @@ #define FAN_REPORT_FID 0x00000200 /* Report unique file id */ #define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ #define FAN_REPORT_NAME 0x00000800 /* Report events with name */ +#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ #define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) +/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */ +#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \ + FAN_REPORT_FID | FAN_REPORT_TARGET_FID) /* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ -- cgit v1.2.3 From e54183fa7047c15819bc155f4c58501d9a9a3489 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 29 Nov 2021 22:15:30 +0200 Subject: fsnotify: generate FS_RENAME event with rich information The dnotify FS_DN_RENAME event is used to request notification about a move within the same parent directory and was always coupled with the FS_MOVED_FROM event. Rename the FS_DN_RENAME event flag to FS_RENAME, decouple it from FS_MOVED_FROM and report it with the moved dentry instead of the moved inode, so it has the information about both old and new parent and name. Generate the FS_RENAME event regardless of same parent dir and apply the "same parent" rule in the generic fsnotify_handle_event() helper that is used to call backends with ->handle_inode_event() method (i.e. dnotify). The ->handle_inode_event() method is not rich enough to report both old and new parent and name anyway. The enriched event is reported to fanotify over the ->handle_event() method with the old and new dir inode marks in marks array slots for ITER_TYPE_INODE and a new iter type slot ITER_TYPE_INODE2. The enriched event will be used for reporting old and new parent+name to fanotify groups with FAN_RENAME events. Link: https://lore.kernel.org/r/20211129201537.1932819-5-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/dnotify.h | 2 +- include/linux/fsnotify.h | 9 ++++++--- include/linux/fsnotify_backend.h | 7 ++++--- 3 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h index 0aad774beaec..b87c3b85a166 100644 --- a/include/linux/dnotify.h +++ b/include/linux/dnotify.h @@ -26,7 +26,7 @@ struct dnotify_struct { FS_MODIFY | FS_MODIFY_CHILD |\ FS_ACCESS | FS_ACCESS_CHILD |\ FS_ATTRIB | FS_ATTRIB_CHILD |\ - FS_CREATE | FS_DN_RENAME |\ + FS_CREATE | FS_RENAME |\ FS_MOVED_FROM | FS_MOVED_TO) extern int dir_notify_enable; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 787545e87eeb..3a2d7dc3c607 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -144,16 +144,19 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, u32 fs_cookie = fsnotify_get_cookie(); __u32 old_dir_mask = FS_MOVED_FROM; __u32 new_dir_mask = FS_MOVED_TO; + __u32 rename_mask = FS_RENAME; const struct qstr *new_name = &moved->d_name; - if (old_dir == new_dir) - old_dir_mask |= FS_DN_RENAME; - if (isdir) { old_dir_mask |= FS_ISDIR; new_dir_mask |= FS_ISDIR; + rename_mask |= FS_ISDIR; } + /* Event with information about both old and new parent+name */ + fsnotify_name(rename_mask, moved, FSNOTIFY_EVENT_DENTRY, + old_dir, old_name, 0); + fsnotify_name(old_dir_mask, source, FSNOTIFY_EVENT_INODE, old_dir, old_name, fs_cookie); fsnotify_name(new_dir_mask, source, FSNOTIFY_EVENT_INODE, diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 73739fee1710..790c31844db5 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -63,7 +63,7 @@ */ #define FS_EVENT_ON_CHILD 0x08000000 -#define FS_DN_RENAME 0x10000000 /* file renamed */ +#define FS_RENAME 0x10000000 /* File was renamed */ #define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ #define FS_ISDIR 0x40000000 /* event occurred against dir */ #define FS_IN_ONESHOT 0x80000000 /* only send event once */ @@ -76,7 +76,7 @@ * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event * when a directory entry inside a child subdir changes. */ -#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE) +#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME) #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) @@ -101,7 +101,7 @@ /* Events that can be reported to backends */ #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \ FS_EVENTS_POSS_ON_CHILD | \ - FS_DELETE_SELF | FS_MOVE_SELF | FS_DN_RENAME | \ + FS_DELETE_SELF | FS_MOVE_SELF | \ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ FS_ERROR) @@ -349,6 +349,7 @@ enum fsnotify_iter_type { FSNOTIFY_ITER_TYPE_VFSMOUNT, FSNOTIFY_ITER_TYPE_SB, FSNOTIFY_ITER_TYPE_PARENT, + FSNOTIFY_ITER_TYPE_INODE2, FSNOTIFY_ITER_TYPE_COUNT }; -- cgit v1.2.3 From 3982534ba5ce45e890b2f5ef5e7372c1accd14c7 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 29 Nov 2021 22:15:34 +0200 Subject: fanotify: record old and new parent and name in FAN_RENAME event In the special case of FAN_RENAME event, we record both the old and new parent and name. Link: https://lore.kernel.org/r/20211129201537.1932819-9-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 60f73639a896..9d0e2dc5767b 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -28,6 +28,8 @@ #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ +#define FAN_RENAME 0x10000000 /* File was renamed */ + #define FAN_ONDIR 0x40000000 /* Event occurred against dir */ /* helper events */ -- cgit v1.2.3 From 7326e382c21e9c23c89c88369afdc90b82a14da8 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 29 Nov 2021 22:15:36 +0200 Subject: fanotify: report old and/or new parent+name in FAN_RENAME event In the special case of FAN_RENAME event, we report old or new or both old and new parent+name. A single info record will be reported if either the old or new dir is watched and two records will be reported if both old and new dir (or their filesystem) are watched. The old and new parent+name are reported using new info record types FAN_EVENT_INFO_TYPE_{OLD,NEW}_DFID_NAME, so if a single info record is reported, it is clear to the application, to which dir entry the fid+name info is referring to. Link: https://lore.kernel.org/r/20211129201537.1932819-11-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 9d0e2dc5767b..e8ac38cc2fd6 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -134,6 +134,12 @@ struct fanotify_event_metadata { #define FAN_EVENT_INFO_TYPE_PIDFD 4 #define FAN_EVENT_INFO_TYPE_ERROR 5 +/* Special info types for FAN_RENAME */ +#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 +/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */ +#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12 +/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */ + /* Variable length info record following event metadata */ struct fanotify_event_info_header { __u8 info_type; -- cgit v1.2.3 From 8cc3b1ccd930fe6971e1527f0c4f1bdc8cb56026 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 29 Nov 2021 22:15:37 +0200 Subject: fanotify: wire up FAN_RENAME event FAN_RENAME is the successor of FAN_MOVED_FROM and FAN_MOVED_TO and can be used to get the old and new parent+name information in a single event. FAN_MOVED_FROM and FAN_MOVED_TO are still supported for backward compatibility, but it makes little sense to use them together with FAN_RENAME in the same group. FAN_RENAME uses special info type records to report the old and new parent+name, so reporting only old and new parent id is less useful and was not implemented. Therefore, FAN_REANAME requires a group with flag FAN_REPORT_NAME. Link: https://lore.kernel.org/r/20211129201537.1932819-12-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fanotify.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 376e050e6f38..3afdf339d53c 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -82,7 +82,8 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ * Directory entry modification events - reported only to directory * where entry is modified and not to a watching parent. */ -#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE) +#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \ + FAN_RENAME) /* Events that can be reported with event->fd */ #define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS) -- cgit v1.2.3 From f1d9268e061863ead77b07f5a6807d063e28a1c2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Dec 2021 07:09:33 -0800 Subject: net: add net device refcount tracker to struct packet_type Most notable changes are in af_packet, tipc ones are trivial. Signed-off-by: Eric Dumazet Cc: Jon Maloy Cc: Ying Xue Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c06e9dc1a317..a419718612c6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2533,6 +2533,7 @@ struct packet_type { __be16 type; /* This is really htons(ether_type). */ bool ignore_outgoing; struct net_device *dev; /* NULL is wildcarded here */ + netdevice_tracker dev_tracker; int (*func) (struct sk_buff *, struct net_device *, struct packet_type *, -- cgit v1.2.3 From 685b1afd7911676691c4167f420e16a957f5a38e Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 9 Dec 2021 12:09:23 +0200 Subject: net/mlx5: Introduce log_max_current_uc_list_wr_supported bit Downstream patch will use this bit in order to know whether the device supports changing of max_uc_list. Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3636df90899a..d3899fc33fd7 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1621,7 +1621,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ext_stride_num_range[0x1]; u8 roce_rw_supported[0x1]; - u8 reserved_at_3a2[0x1]; + u8 log_max_current_uc_list_wr_supported[0x1]; u8 log_max_stride_sz_rq[0x5]; u8 reserved_at_3a8[0x3]; u8 log_min_stride_sz_rq[0x5]; -- cgit v1.2.3 From fb82437fdd8cd8ac41b1265e40a96668e33c3a8d Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 18 Nov 2021 16:13:00 +0200 Subject: PCI: Change capability register offsets to hex Convert offsets of capability registers from decimal to hex. This matches the spec documents and is less error prone. [bhelgaas: also convert other capabilities with offsets > 8] Suggested-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20210825160516.GA3576414@bjorn-Precision-5520/ Link: https://lore.kernel.org/r/aa067278adacbb59a675366052714081f4980f26.1637244780.git.baruch@tkos.co.il Signed-off-by: Baruch Siach Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 138 +++++++++++++++++++++--------------------- 1 file changed, 69 insertions(+), 69 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index ff6ccbc6efe9..fe86f5310d76 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -301,23 +301,23 @@ #define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */ #define PCI_SID_CHASSIS_NR 3 /* Chassis Number */ -/* Message Signalled Interrupt registers */ +/* Message Signaled Interrupt registers */ -#define PCI_MSI_FLAGS 2 /* Message Control */ +#define PCI_MSI_FLAGS 0x02 /* Message Control */ #define PCI_MSI_FLAGS_ENABLE 0x0001 /* MSI feature enabled */ #define PCI_MSI_FLAGS_QMASK 0x000e /* Maximum queue size available */ #define PCI_MSI_FLAGS_QSIZE 0x0070 /* Message queue size configured */ #define PCI_MSI_FLAGS_64BIT 0x0080 /* 64-bit addresses allowed */ #define PCI_MSI_FLAGS_MASKBIT 0x0100 /* Per-vector masking capable */ #define PCI_MSI_RFU 3 /* Rest of capability flags */ -#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ -#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ -#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ -#define PCI_MSI_MASK_32 12 /* Mask bits register for 32-bit devices */ -#define PCI_MSI_PENDING_32 16 /* Pending intrs for 32-bit devices */ -#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ -#define PCI_MSI_MASK_64 16 /* Mask bits register for 64-bit devices */ -#define PCI_MSI_PENDING_64 20 /* Pending intrs for 64-bit devices */ +#define PCI_MSI_ADDRESS_LO 0x04 /* Lower 32 bits */ +#define PCI_MSI_ADDRESS_HI 0x08 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ +#define PCI_MSI_DATA_32 0x08 /* 16 bits of data for 32-bit devices */ +#define PCI_MSI_MASK_32 0x0c /* Mask bits register for 32-bit devices */ +#define PCI_MSI_PENDING_32 0x10 /* Pending intrs for 32-bit devices */ +#define PCI_MSI_DATA_64 0x0c /* 16 bits of data for 64-bit devices */ +#define PCI_MSI_MASK_64 0x10 /* Mask bits register for 64-bit devices */ +#define PCI_MSI_PENDING_64 0x14 /* Pending intrs for 64-bit devices */ /* MSI-X registers (in MSI-X capability) */ #define PCI_MSIX_FLAGS 2 /* Message Control */ @@ -335,10 +335,10 @@ /* MSI-X Table entry format (in memory mapped by a BAR) */ #define PCI_MSIX_ENTRY_SIZE 16 -#define PCI_MSIX_ENTRY_LOWER_ADDR 0 /* Message Address */ -#define PCI_MSIX_ENTRY_UPPER_ADDR 4 /* Message Upper Address */ -#define PCI_MSIX_ENTRY_DATA 8 /* Message Data */ -#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 /* Vector Control */ +#define PCI_MSIX_ENTRY_LOWER_ADDR 0x0 /* Message Address */ +#define PCI_MSIX_ENTRY_UPPER_ADDR 0x4 /* Message Upper Address */ +#define PCI_MSIX_ENTRY_DATA 0x8 /* Message Data */ +#define PCI_MSIX_ENTRY_VECTOR_CTRL 0xc /* Vector Control */ #define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001 /* CompactPCI Hotswap Register */ @@ -470,7 +470,7 @@ /* PCI Express capability registers */ -#define PCI_EXP_FLAGS 2 /* Capabilities register */ +#define PCI_EXP_FLAGS 0x02 /* Capabilities register */ #define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ #define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ #define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ @@ -484,7 +484,7 @@ #define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ #define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ -#define PCI_EXP_DEVCAP 4 /* Device capabilities */ +#define PCI_EXP_DEVCAP 0x04 /* Device capabilities */ #define PCI_EXP_DEVCAP_PAYLOAD 0x00000007 /* Max_Payload_Size */ #define PCI_EXP_DEVCAP_PHANTOM 0x00000018 /* Phantom functions */ #define PCI_EXP_DEVCAP_EXT_TAG 0x00000020 /* Extended tags */ @@ -497,7 +497,7 @@ #define PCI_EXP_DEVCAP_PWR_VAL 0x03fc0000 /* Slot Power Limit Value */ #define PCI_EXP_DEVCAP_PWR_SCL 0x0c000000 /* Slot Power Limit Scale */ #define PCI_EXP_DEVCAP_FLR 0x10000000 /* Function Level Reset */ -#define PCI_EXP_DEVCTL 8 /* Device Control */ +#define PCI_EXP_DEVCTL 0x08 /* Device Control */ #define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */ #define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ #define PCI_EXP_DEVCTL_FERE 0x0004 /* Fatal Error Reporting Enable */ @@ -522,7 +522,7 @@ #define PCI_EXP_DEVCTL_READRQ_2048B 0x4000 /* 2048 Bytes */ #define PCI_EXP_DEVCTL_READRQ_4096B 0x5000 /* 4096 Bytes */ #define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */ -#define PCI_EXP_DEVSTA 10 /* Device Status */ +#define PCI_EXP_DEVSTA 0x0a /* Device Status */ #define PCI_EXP_DEVSTA_CED 0x0001 /* Correctable Error Detected */ #define PCI_EXP_DEVSTA_NFED 0x0002 /* Non-Fatal Error Detected */ #define PCI_EXP_DEVSTA_FED 0x0004 /* Fatal Error Detected */ @@ -530,7 +530,7 @@ #define PCI_EXP_DEVSTA_AUXPD 0x0010 /* AUX Power Detected */ #define PCI_EXP_DEVSTA_TRPND 0x0020 /* Transactions Pending */ #define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1 12 /* v1 endpoints without link end here */ -#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ +#define PCI_EXP_LNKCAP 0x0c /* Link Capabilities */ #define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ #define PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */ #define PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */ @@ -549,7 +549,7 @@ #define PCI_EXP_LNKCAP_DLLLARC 0x00100000 /* Data Link Layer Link Active Reporting Capable */ #define PCI_EXP_LNKCAP_LBNC 0x00200000 /* Link Bandwidth Notification Capability */ #define PCI_EXP_LNKCAP_PN 0xff000000 /* Port Number */ -#define PCI_EXP_LNKCTL 16 /* Link Control */ +#define PCI_EXP_LNKCTL 0x10 /* Link Control */ #define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ #define PCI_EXP_LNKCTL_ASPM_L0S 0x0001 /* L0s Enable */ #define PCI_EXP_LNKCTL_ASPM_L1 0x0002 /* L1 Enable */ @@ -562,7 +562,7 @@ #define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */ #define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */ #define PCI_EXP_LNKCTL_LABIE 0x0800 /* Link Autonomous Bandwidth Interrupt Enable */ -#define PCI_EXP_LNKSTA 18 /* Link Status */ +#define PCI_EXP_LNKSTA 0x12 /* Link Status */ #define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ #define PCI_EXP_LNKSTA_CLS_2_5GB 0x0001 /* Current Link Speed 2.5GT/s */ #define PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */ @@ -582,7 +582,7 @@ #define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */ #define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */ #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20 /* v1 endpoints with link end here */ -#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ +#define PCI_EXP_SLTCAP 0x14 /* Slot Capabilities */ #define PCI_EXP_SLTCAP_ABP 0x00000001 /* Attention Button Present */ #define PCI_EXP_SLTCAP_PCP 0x00000002 /* Power Controller Present */ #define PCI_EXP_SLTCAP_MRLSP 0x00000004 /* MRL Sensor Present */ @@ -595,7 +595,7 @@ #define PCI_EXP_SLTCAP_EIP 0x00020000 /* Electromechanical Interlock Present */ #define PCI_EXP_SLTCAP_NCCS 0x00040000 /* No Command Completed Support */ #define PCI_EXP_SLTCAP_PSN 0xfff80000 /* Physical Slot Number */ -#define PCI_EXP_SLTCTL 24 /* Slot Control */ +#define PCI_EXP_SLTCTL 0x18 /* Slot Control */ #define PCI_EXP_SLTCTL_ABPE 0x0001 /* Attention Button Pressed Enable */ #define PCI_EXP_SLTCTL_PFDE 0x0002 /* Power Fault Detected Enable */ #define PCI_EXP_SLTCTL_MRLSCE 0x0004 /* MRL Sensor Changed Enable */ @@ -617,7 +617,7 @@ #define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ #define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ #define PCI_EXP_SLTCTL_IBPD_DISABLE 0x4000 /* In-band PD disable */ -#define PCI_EXP_SLTSTA 26 /* Slot Status */ +#define PCI_EXP_SLTSTA 0x1a /* Slot Status */ #define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ #define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */ #define PCI_EXP_SLTSTA_MRLSC 0x0004 /* MRL Sensor Changed */ @@ -627,15 +627,15 @@ #define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */ #define PCI_EXP_SLTSTA_EIS 0x0080 /* Electromechanical Interlock Status */ #define PCI_EXP_SLTSTA_DLLSC 0x0100 /* Data Link Layer State Changed */ -#define PCI_EXP_RTCTL 28 /* Root Control */ +#define PCI_EXP_RTCTL 0x1c /* Root Control */ #define PCI_EXP_RTCTL_SECEE 0x0001 /* System Error on Correctable Error */ #define PCI_EXP_RTCTL_SENFEE 0x0002 /* System Error on Non-Fatal Error */ #define PCI_EXP_RTCTL_SEFEE 0x0004 /* System Error on Fatal Error */ #define PCI_EXP_RTCTL_PMEIE 0x0008 /* PME Interrupt Enable */ #define PCI_EXP_RTCTL_CRSSVE 0x0010 /* CRS Software Visibility Enable */ -#define PCI_EXP_RTCAP 30 /* Root Capabilities */ +#define PCI_EXP_RTCAP 0x1e /* Root Capabilities */ #define PCI_EXP_RTCAP_CRSVIS 0x0001 /* CRS Software Visibility capability */ -#define PCI_EXP_RTSTA 32 /* Root Status */ +#define PCI_EXP_RTSTA 0x20 /* Root Status */ #define PCI_EXP_RTSTA_PME 0x00010000 /* PME status */ #define PCI_EXP_RTSTA_PENDING 0x00020000 /* PME pending */ /* @@ -646,7 +646,7 @@ * Use pcie_capability_read_word() and similar interfaces to use them * safely. */ -#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ +#define PCI_EXP_DEVCAP2 0x24 /* Device Capabilities 2 */ #define PCI_EXP_DEVCAP2_COMP_TMOUT_DIS 0x00000010 /* Completion Timeout Disable supported */ #define PCI_EXP_DEVCAP2_ARI 0x00000020 /* Alternative Routing-ID */ #define PCI_EXP_DEVCAP2_ATOMIC_ROUTE 0x00000040 /* Atomic Op routing */ @@ -658,7 +658,7 @@ #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ #define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */ #define PCI_EXP_DEVCAP2_EE_PREFIX 0x00200000 /* End-End TLP Prefix */ -#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ +#define PCI_EXP_DEVCTL2 0x28 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ #define PCI_EXP_DEVCTL2_COMP_TMOUT_DIS 0x0010 /* Completion Timeout Disable */ #define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ @@ -670,9 +670,9 @@ #define PCI_EXP_DEVCTL2_OBFF_MSGA_EN 0x2000 /* Enable OBFF Message type A */ #define PCI_EXP_DEVCTL2_OBFF_MSGB_EN 0x4000 /* Enable OBFF Message type B */ #define PCI_EXP_DEVCTL2_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ -#define PCI_EXP_DEVSTA2 42 /* Device Status 2 */ -#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints without link end here */ -#define PCI_EXP_LNKCAP2 44 /* Link Capabilities 2 */ +#define PCI_EXP_DEVSTA2 0x2a /* Device Status 2 */ +#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 0x2c /* end of v2 EPs w/o link */ +#define PCI_EXP_LNKCAP2 0x2c /* Link Capabilities 2 */ #define PCI_EXP_LNKCAP2_SLS_2_5GB 0x00000002 /* Supported Speed 2.5GT/s */ #define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5GT/s */ #define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8GT/s */ @@ -680,7 +680,7 @@ #define PCI_EXP_LNKCAP2_SLS_32_0GB 0x00000020 /* Supported Speed 32GT/s */ #define PCI_EXP_LNKCAP2_SLS_64_0GB 0x00000040 /* Supported Speed 64GT/s */ #define PCI_EXP_LNKCAP2_CROSSLINK 0x00000100 /* Crosslink supported */ -#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ +#define PCI_EXP_LNKCTL2 0x30 /* Link Control 2 */ #define PCI_EXP_LNKCTL2_TLS 0x000f #define PCI_EXP_LNKCTL2_TLS_2_5GT 0x0001 /* Supported Speed 2.5GT/s */ #define PCI_EXP_LNKCTL2_TLS_5_0GT 0x0002 /* Supported Speed 5GT/s */ @@ -691,12 +691,12 @@ #define PCI_EXP_LNKCTL2_ENTER_COMP 0x0010 /* Enter Compliance */ #define PCI_EXP_LNKCTL2_TX_MARGIN 0x0380 /* Transmit Margin */ #define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ -#define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ -#define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */ +#define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ +#define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */ #define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ -#define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ -#define PCI_EXP_SLTSTA2 58 /* Slot Status 2 */ +#define PCI_EXP_SLTCTL2 0x38 /* Slot Control 2 */ +#define PCI_EXP_SLTSTA2 0x3a /* Slot Status 2 */ /* Extended Capabilities (PCI-X 2.0 and Express) */ #define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) @@ -742,7 +742,7 @@ #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40 /* Advanced Error Reporting */ -#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ +#define PCI_ERR_UNCOR_STATUS 0x04 /* Uncorrectable Error Status */ #define PCI_ERR_UNC_UND 0x00000001 /* Undefined */ #define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */ #define PCI_ERR_UNC_SURPDN 0x00000020 /* Surprise Down */ @@ -760,11 +760,11 @@ #define PCI_ERR_UNC_MCBTLP 0x00800000 /* MC blocked TLP */ #define PCI_ERR_UNC_ATOMEG 0x01000000 /* Atomic egress blocked */ #define PCI_ERR_UNC_TLPPRE 0x02000000 /* TLP prefix blocked */ -#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */ +#define PCI_ERR_UNCOR_MASK 0x08 /* Uncorrectable Error Mask */ /* Same bits as above */ -#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */ +#define PCI_ERR_UNCOR_SEVER 0x0c /* Uncorrectable Error Severity */ /* Same bits as above */ -#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */ +#define PCI_ERR_COR_STATUS 0x10 /* Correctable Error Status */ #define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */ #define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */ #define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */ @@ -773,20 +773,20 @@ #define PCI_ERR_COR_ADV_NFAT 0x00002000 /* Advisory Non-Fatal */ #define PCI_ERR_COR_INTERNAL 0x00004000 /* Corrected Internal */ #define PCI_ERR_COR_LOG_OVER 0x00008000 /* Header Log Overflow */ -#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */ +#define PCI_ERR_COR_MASK 0x14 /* Correctable Error Mask */ /* Same bits as above */ -#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */ -#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */ +#define PCI_ERR_CAP 0x18 /* Advanced Error Capabilities & Ctrl*/ +#define PCI_ERR_CAP_FEP(x) ((x) & 0x1f) /* First Error Pointer */ #define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */ #define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */ #define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ #define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ -#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ -#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ +#define PCI_ERR_HEADER_LOG 0x1c /* Header Log Register (16 bytes) */ +#define PCI_ERR_ROOT_COMMAND 0x2c /* Root Error Command */ #define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 /* Correctable Err Reporting Enable */ #define PCI_ERR_ROOT_CMD_NONFATAL_EN 0x00000002 /* Non-Fatal Err Reporting Enable */ #define PCI_ERR_ROOT_CMD_FATAL_EN 0x00000004 /* Fatal Err Reporting Enable */ -#define PCI_ERR_ROOT_STATUS 48 +#define PCI_ERR_ROOT_STATUS 0x30 #define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ #define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 /* Multiple ERR_COR */ #define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 /* ERR_FATAL/NONFATAL */ @@ -795,52 +795,52 @@ #define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ #define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ #define PCI_ERR_ROOT_AER_IRQ 0xf8000000 /* Advanced Error Interrupt Message Number */ -#define PCI_ERR_ROOT_ERR_SRC 52 /* Error Source Identification */ +#define PCI_ERR_ROOT_ERR_SRC 0x34 /* Error Source Identification */ /* Virtual Channel */ -#define PCI_VC_PORT_CAP1 4 +#define PCI_VC_PORT_CAP1 0x04 #define PCI_VC_CAP1_EVCC 0x00000007 /* extended VC count */ #define PCI_VC_CAP1_LPEVCC 0x00000070 /* low prio extended VC count */ #define PCI_VC_CAP1_ARB_SIZE 0x00000c00 -#define PCI_VC_PORT_CAP2 8 +#define PCI_VC_PORT_CAP2 0x08 #define PCI_VC_CAP2_32_PHASE 0x00000002 #define PCI_VC_CAP2_64_PHASE 0x00000004 #define PCI_VC_CAP2_128_PHASE 0x00000008 #define PCI_VC_CAP2_ARB_OFF 0xff000000 -#define PCI_VC_PORT_CTRL 12 +#define PCI_VC_PORT_CTRL 0x0c #define PCI_VC_PORT_CTRL_LOAD_TABLE 0x00000001 -#define PCI_VC_PORT_STATUS 14 +#define PCI_VC_PORT_STATUS 0x0e #define PCI_VC_PORT_STATUS_TABLE 0x00000001 -#define PCI_VC_RES_CAP 16 +#define PCI_VC_RES_CAP 0x10 #define PCI_VC_RES_CAP_32_PHASE 0x00000002 #define PCI_VC_RES_CAP_64_PHASE 0x00000004 #define PCI_VC_RES_CAP_128_PHASE 0x00000008 #define PCI_VC_RES_CAP_128_PHASE_TB 0x00000010 #define PCI_VC_RES_CAP_256_PHASE 0x00000020 #define PCI_VC_RES_CAP_ARB_OFF 0xff000000 -#define PCI_VC_RES_CTRL 20 +#define PCI_VC_RES_CTRL 0x14 #define PCI_VC_RES_CTRL_LOAD_TABLE 0x00010000 #define PCI_VC_RES_CTRL_ARB_SELECT 0x000e0000 #define PCI_VC_RES_CTRL_ID 0x07000000 #define PCI_VC_RES_CTRL_ENABLE 0x80000000 -#define PCI_VC_RES_STATUS 26 +#define PCI_VC_RES_STATUS 0x1a #define PCI_VC_RES_STATUS_TABLE 0x00000001 #define PCI_VC_RES_STATUS_NEGO 0x00000002 #define PCI_CAP_VC_BASE_SIZEOF 0x10 -#define PCI_CAP_VC_PER_VC_SIZEOF 0x0C +#define PCI_CAP_VC_PER_VC_SIZEOF 0x0c /* Power Budgeting */ -#define PCI_PWR_DSR 4 /* Data Select Register */ -#define PCI_PWR_DATA 8 /* Data Register */ +#define PCI_PWR_DSR 0x04 /* Data Select Register */ +#define PCI_PWR_DATA 0x08 /* Data Register */ #define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */ #define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */ #define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */ #define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */ #define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */ #define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */ -#define PCI_PWR_CAP 12 /* Capability */ +#define PCI_PWR_CAP 0x0c /* Capability */ #define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ -#define PCI_EXT_CAP_PWR_SIZEOF 16 +#define PCI_EXT_CAP_PWR_SIZEOF 0x10 /* Root Complex Event Collector Endpoint Association */ #define PCI_RCEC_RCIEP_BITMAP 4 /* Associated Bitmap for RCiEPs */ @@ -964,7 +964,7 @@ #define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ #define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ #define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ -#define PCI_EXT_CAP_SRIOV_SIZEOF 64 +#define PCI_EXT_CAP_SRIOV_SIZEOF 0x40 #define PCI_LTR_MAX_SNOOP_LAT 0x4 #define PCI_LTR_MAX_NOSNOOP_LAT 0x6 @@ -1017,12 +1017,12 @@ #define PCI_TPH_LOC_NONE 0x000 /* no location */ #define PCI_TPH_LOC_CAP 0x200 /* in capability */ #define PCI_TPH_LOC_MSIX 0x400 /* in MSI-X */ -#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* st table mask */ -#define PCI_TPH_CAP_ST_SHIFT 16 /* st table shift */ -#define PCI_TPH_BASE_SIZEOF 12 /* size with no st table */ +#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST table mask */ +#define PCI_TPH_CAP_ST_SHIFT 16 /* ST table shift */ +#define PCI_TPH_BASE_SIZEOF 0xc /* size with no ST table */ /* Downstream Port Containment */ -#define PCI_EXP_DPC_CAP 4 /* DPC Capability */ +#define PCI_EXP_DPC_CAP 0x04 /* DPC Capability */ #define PCI_EXP_DPC_IRQ 0x001F /* Interrupt Message Number */ #define PCI_EXP_DPC_CAP_RP_EXT 0x0020 /* Root Port Extensions */ #define PCI_EXP_DPC_CAP_POISONED_TLP 0x0040 /* Poisoned TLP Egress Blocking Supported */ @@ -1030,19 +1030,19 @@ #define PCI_EXP_DPC_RP_PIO_LOG_SIZE 0x0F00 /* RP PIO Log Size */ #define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */ -#define PCI_EXP_DPC_CTL 6 /* DPC control */ +#define PCI_EXP_DPC_CTL 0x06 /* DPC control */ #define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */ #define PCI_EXP_DPC_CTL_EN_NONFATAL 0x0002 /* Enable trigger on ERR_NONFATAL message */ #define PCI_EXP_DPC_CTL_INT_EN 0x0008 /* DPC Interrupt Enable */ -#define PCI_EXP_DPC_STATUS 8 /* DPC Status */ +#define PCI_EXP_DPC_STATUS 0x08 /* DPC Status */ #define PCI_EXP_DPC_STATUS_TRIGGER 0x0001 /* Trigger Status */ #define PCI_EXP_DPC_STATUS_TRIGGER_RSN 0x0006 /* Trigger Reason */ #define PCI_EXP_DPC_STATUS_INTERRUPT 0x0008 /* Interrupt Status */ #define PCI_EXP_DPC_RP_BUSY 0x0010 /* Root Port Busy */ #define PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT 0x0060 /* Trig Reason Extension */ -#define PCI_EXP_DPC_SOURCE_ID 10 /* DPC Source Identifier */ +#define PCI_EXP_DPC_SOURCE_ID 0x0A /* DPC Source Identifier */ #define PCI_EXP_DPC_RP_PIO_STATUS 0x0C /* RP PIO Status */ #define PCI_EXP_DPC_RP_PIO_MASK 0x10 /* RP PIO Mask */ -- cgit v1.2.3 From ff5f87cb6a75dbf6d30668d2464e46249dd5c47f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 3 Dec 2021 11:28:49 +0100 Subject: clk: Introduce clk-tps68470 driver The TPS68470 PMIC provides Clocks, GPIOs and Regulators. At present in the kernel the Regulators and Clocks are controlled by an OpRegion driver designed to work with power control methods defined in ACPI, but some platforms lack those methods, meaning drivers need to be able to consume the resources of these chips through the usual frameworks. This commit adds a driver for the clocks provided by the tps68470, and is designed to bind to the platform_device registered by the intel_skl_int3472 module. This is based on this out of tree driver written by Intel: https://github.com/intel/linux-intel-lts/blob/4.14/base/drivers/clk/clk-tps68470.c with various cleanups added. Reviewed-by: Andy Shevchenko Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211203102857.44539-7-hdegoede@redhat.com Signed-off-by: Stephen Boyd --- include/linux/mfd/tps68470.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/tps68470.h b/include/linux/mfd/tps68470.h index ffe81127d91c..7807fa329db0 100644 --- a/include/linux/mfd/tps68470.h +++ b/include/linux/mfd/tps68470.h @@ -75,6 +75,17 @@ #define TPS68470_CLKCFG1_MODE_A_MASK GENMASK(1, 0) #define TPS68470_CLKCFG1_MODE_B_MASK GENMASK(3, 2) +#define TPS68470_CLKCFG2_DRV_STR_2MA 0x05 +#define TPS68470_PLL_OUTPUT_ENABLE 0x02 +#define TPS68470_CLK_SRC_XTAL BIT(0) +#define TPS68470_PLLSWR_DEFAULT GENMASK(1, 0) +#define TPS68470_OSC_EXT_CAP_DEFAULT 0x05 + +#define TPS68470_OUTPUT_A_SHIFT 0x00 +#define TPS68470_OUTPUT_B_SHIFT 0x02 +#define TPS68470_CLK_SRC_SHIFT GENMASK(2, 0) +#define TPS68470_OSC_EXT_CAP_SHIFT BIT(2) + #define TPS68470_GPIO_CTL_REG_A(x) (TPS68470_REG_GPCTL0A + (x) * 2) #define TPS68470_GPIO_CTL_REG_B(x) (TPS68470_REG_GPCTL0B + (x) * 2) #define TPS68470_GPIO_MODE_MASK GENMASK(1, 0) -- cgit v1.2.3 From fe415186b43df0db1f17fa3a46275fd92107fe71 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 16 Dec 2021 08:24:08 +0100 Subject: xen/console: harden hvc_xen against event channel storms The Xen console driver is still vulnerable for an attack via excessive number of events sent by the backend. Fix that by using a lateeoi event channel. For the normal domU initial console this requires the introduction of bind_evtchn_to_irq_lateeoi() as there is no xenbus device available at the time the event channel is bound to the irq. As the decision whether an interrupt was spurious or not requires to test for bytes having been read from the backend, move sending the event into the if statement, as sending an event without having found any bytes to be read is making no sense at all. This is part of XSA-391 Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V2: - slightly adapt spurious irq detection (Jan Beulich) V3: - fix spurious irq detection (Jan Beulich) --- include/xen/events.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/xen/events.h b/include/xen/events.h index c204262d9fc2..344081e71584 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -17,6 +17,7 @@ struct xenbus_device; unsigned xen_evtchn_nr_channels(void); int bind_evtchn_to_irq(evtchn_port_t evtchn); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn); int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, -- cgit v1.2.3 From dfd0743f1d9ea76931510ed150334d571fbab49d Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Thu, 9 Dec 2021 15:59:37 +0100 Subject: tee: handle lookup of shm with reference count 0 Since the tee subsystem does not keep a strong reference to its idle shared memory buffers, it races with other threads that try to destroy a shared memory through a close of its dma-buf fd or by unmapping the memory. In tee_shm_get_from_id() when a lookup in teedev->idr has been successful, it is possible that the tee_shm is in the dma-buf teardown path, but that path is blocked by the teedev mutex. Since we don't have an API to tell if the tee_shm is in the dma-buf teardown path or not we must find another way of detecting this condition. Fix this by doing the reference counting directly on the tee_shm using a new refcount_t refcount field. dma-buf is replaced by using anon_inode_getfd() instead, this separates the life-cycle of the underlying file from the tee_shm. tee_shm_put() is updated to hold the mutex when decreasing the refcount to 0 and then remove the tee_shm from teedev->idr before releasing the mutex. This means that the tee_shm can never be found unless it has a refcount larger than 0. Fixes: 967c9cca2cc5 ("tee: generic TEE subsystem") Cc: stable@vger.kernel.org Reviewed-by: Greg Kroah-Hartman Reviewed-by: Lars Persson Reviewed-by: Sumit Garg Reported-by: Patrik Lantz Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index a1f03461369b..cf5999626e28 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -195,7 +195,7 @@ int tee_session_calc_client_uuid(uuid_t *uuid, u32 connection_method, * @offset: offset of buffer in user space * @pages: locked pages from userspace * @num_pages: number of locked pages - * @dmabuf: dmabuf used to for exporting to user space + * @refcount: reference counter * @flags: defined by TEE_SHM_* in tee_drv.h * @id: unique id of a shared memory object on this device, shared * with user space @@ -214,7 +214,7 @@ struct tee_shm { unsigned int offset; struct page **pages; size_t num_pages; - struct dma_buf *dmabuf; + refcount_t refcount; u32 flags; int id; u64 sec_world_id; -- cgit v1.2.3 From 877691b987a089938d67de13d886932ef2f21b22 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Sun, 12 Dec 2021 15:24:06 +0900 Subject: drm/format-helper: Add drm_fb_xrgb8888_to_xrgb2101010_toio() Add XRGB8888 emulation support for devices that can only do XRGB2101010. This is chiefly useful for simpledrm on Apple devices where the bootloader-provided framebuffer is 10-bit. Signed-off-by: Hector Martin Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20211212062407.138309-3-marcan@marcan.st --- include/drm/drm_format_helper.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index 97e4c3223af3..b30ed5de0a33 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -33,6 +33,9 @@ void drm_fb_xrgb8888_to_rgb888(void *dst, unsigned int dst_pitch, const void *sr void drm_fb_xrgb8888_to_rgb888_toio(void __iomem *dst, unsigned int dst_pitch, const void *vaddr, const struct drm_framebuffer *fb, const struct drm_rect *clip); +void drm_fb_xrgb8888_to_xrgb2101010_toio(void __iomem *dst, unsigned int dst_pitch, + const void *vaddr, const struct drm_framebuffer *fb, + const struct drm_rect *clip); void drm_fb_xrgb8888_to_gray8(void *dst, unsigned int dst_pitch, const void *vaddr, const struct drm_framebuffer *fb, const struct drm_rect *clip); -- cgit v1.2.3 From 006ea1b5822f9019bd722ffc6242bc0880879e3d Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Wed, 15 Dec 2021 10:17:37 +0100 Subject: drm/fourcc: Add packed 10bit YUV 4:2:0 format Adds a format that is 3 10bit YUV 4:2:0 samples packed into a 32bit word (with 2 spare bits). Supported on Broadcom BCM2711 chips. Signed-off-by: Dave Stevenson Signed-off-by: Maxime Ripard Acked-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20211215091739.135042-2-maxime@cerno.tech --- include/uapi/drm/drm_fourcc.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 7f652c96845b..fc0c1454d275 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -314,6 +314,13 @@ extern "C" { */ #define DRM_FORMAT_P016 fourcc_code('P', '0', '1', '6') /* 2x2 subsampled Cr:Cb plane 16 bits per channel */ +/* 2 plane YCbCr420. + * 3 10 bit components and 2 padding bits packed into 4 bytes. + * index 0 = Y plane, [31:0] x:Y2:Y1:Y0 2:10:10:10 little endian + * index 1 = Cr:Cb plane, [63:0] x:Cr2:Cb2:Cr1:x:Cb1:Cr0:Cb0 [2:10:10:10:2:10:10:10] little endian + */ +#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */ + /* 3 plane non-subsampled (444) YCbCr * 16 bits per component, but only 10 bits are used and 6 bits are padded * index 0: Y plane, [15:0] Y:x [10:6] little endian @@ -854,6 +861,10 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) * and UV. Some SAND-using hardware stores UV in a separate tiled * image from Y to reduce the column height, which is not supported * with these modifiers. + * + * The DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT modifier is also + * supported for DRM_FORMAT_P030 where the columns remain as 128 bytes + * wide, but as this is a 10 bpp format that translates to 96 pixels. */ #define DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(v) \ -- cgit v1.2.3 From cb1c4aba055f928ffae0c868e8dfe08eeab302e7 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 6 Dec 2021 14:47:46 +0530 Subject: perf: Add new macros for mem_hops field Add new macros for mem_hops field which can be used to represent remote-node, socket and board level details. Currently the code had macro for HOPS_0, which corresponds to data coming from another core but same node. Add new macros for HOPS_1 to HOPS_3 to represent remote-node, socket and board level data. For ex: Encodings for mem_hops fields with L2 cache: L2 - local L2 L2 | REMOTE | HOPS_0 - remote core, same node L2 L2 | REMOTE | HOPS_1 - remote node, same socket L2 L2 | REMOTE | HOPS_2 - remote socket, same board L2 L2 | REMOTE | HOPS_3 - remote board L2 Signed-off-by: Kajol Jain Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211206091749.87585-2-kjain@linux.ibm.com --- include/uapi/linux/perf_event.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index bd8860eeb291..1b65042ab1db 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1332,7 +1332,10 @@ union perf_mem_data_src { /* hop level */ #define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -/* 2-7 available */ +#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ +#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ +#define PERF_MEM_HOPS_3 0x04 /* remote board */ +/* 5-7 available */ #define PERF_MEM_HOPS_SHIFT 43 #define PERF_MEM_S(a, s) \ -- cgit v1.2.3 From d1e86325af377129adb7fc6f34eb044ca6068b47 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 15 Dec 2021 15:34:15 +0000 Subject: net: phylink: add mac_select_pcs() method to phylink_mac_ops mac_select_pcs() allows us to have an explicit point to query which PCS the MAC wishes to use for a particular PHY interface mode, thereby allowing us to add support to validate the link settings with the PCS. Phylink will also use this to select the PCS to be used during a major configuration event without the MAC driver needing to call phylink_set_pcs(). Note that if mac_select_pcs() is present, the supported_interfaces bitmap must be filled in; this avoids mac_select_pcs() being called with PHY_INTERFACE_MODE_NA when we want to get support for all interface types. Phylink will return an error in phylink_create() unless this condition is satisfied. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/phylink.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index a2f266cc3442..b3086dcafeaf 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -112,6 +112,7 @@ struct phylink_config { /** * struct phylink_mac_ops - MAC operations structure. * @validate: Validate and update the link configuration. + * @mac_select_pcs: Select a PCS for the interface mode. * @mac_pcs_get_state: Read the current link state from the hardware. * @mac_prepare: prepare for a major reconfiguration of the interface. * @mac_config: configure the MAC for the selected mode and state. @@ -126,6 +127,8 @@ struct phylink_mac_ops { void (*validate)(struct phylink_config *config, unsigned long *supported, struct phylink_link_state *state); + struct phylink_pcs *(*mac_select_pcs)(struct phylink_config *config, + phy_interface_t interface); void (*mac_pcs_get_state)(struct phylink_config *config, struct phylink_link_state *state); int (*mac_prepare)(struct phylink_config *config, unsigned int mode, @@ -178,6 +181,21 @@ struct phylink_mac_ops { */ void validate(struct phylink_config *config, unsigned long *supported, struct phylink_link_state *state); +/** + * mac_select_pcs: Select a PCS for the interface mode. + * @config: a pointer to a &struct phylink_config. + * @interface: PHY interface mode for PCS + * + * Return the &struct phylink_pcs for the specified interface mode, or + * NULL if none is required, or an error pointer on error. + * + * This must not modify any state. It is used to query which PCS should + * be used. Phylink will use this during validation to ensure that the + * configuration is valid, and when setting a configuration to internally + * set the PCS that will be used. + */ +struct phylink_pcs *mac_select_pcs(struct phylink_config *config, + phy_interface_t interface); /** * mac_pcs_get_state() - Read the current inband link state from the hardware -- cgit v1.2.3 From 0d22d4b626a4eaa3196019092eb6c1919e9f8caa Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 15 Dec 2021 15:34:20 +0000 Subject: net: phylink: add pcs_validate() method Add a hook for PCS to validate the link parameters. This avoids MAC drivers having to have knowledge of their PCS in their validate() method, thereby allowing several MAC drivers to be simplfied. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/phylink.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index b3086dcafeaf..713a0c928b7c 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -416,6 +416,7 @@ struct phylink_pcs { /** * struct phylink_pcs_ops - MAC PCS operations structure. + * @pcs_validate: validate the link configuration. * @pcs_get_state: read the current MAC PCS link state from the hardware. * @pcs_config: configure the MAC PCS for the selected mode and state. * @pcs_an_restart: restart 802.3z BaseX autonegotiation. @@ -423,6 +424,8 @@ struct phylink_pcs { * (where necessary). */ struct phylink_pcs_ops { + int (*pcs_validate)(struct phylink_pcs *pcs, unsigned long *supported, + const struct phylink_link_state *state); void (*pcs_get_state)(struct phylink_pcs *pcs, struct phylink_link_state *state); int (*pcs_config)(struct phylink_pcs *pcs, unsigned int mode, @@ -435,6 +438,23 @@ struct phylink_pcs_ops { }; #if 0 /* For kernel-doc purposes only. */ +/** + * pcs_validate() - validate the link configuration. + * @pcs: a pointer to a &struct phylink_pcs. + * @supported: ethtool bitmask for supported link modes. + * @state: a const pointer to a &struct phylink_link_state. + * + * Validate the interface mode, and advertising's autoneg bit, removing any + * media ethtool link modes that would not be supportable from the supported + * mask. Phylink will propagate the changes to the advertising mask. See the + * &struct phylink_mac_ops validate() method. + * + * Returns -EINVAL if the interface mode/autoneg mode is not supported. + * Returns non-zero positive if the link state can be supported. + */ +int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported, + const struct phylink_link_state *state); + /** * pcs_get_state() - Read the current inband link state from the hardware * @pcs: a pointer to a &struct phylink_pcs. -- cgit v1.2.3 From 9131c6331726514f0d0364c41f8733cc5eec11ab Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 1 Dec 2021 02:23:09 +0300 Subject: soc/tegra: Add devm_tegra_core_dev_init_opp_table_common() Only couple drivers need to get the -ENODEV error code and majority of drivers need to explicitly initialize the performance state. Add new common helper which sets up OPP table for these drivers. Reviewed-by: Ulf Hansson Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding --- include/soc/tegra/common.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/soc/tegra/common.h b/include/soc/tegra/common.h index af41ad80ec21..8ec1ac07fc85 100644 --- a/include/soc/tegra/common.h +++ b/include/soc/tegra/common.h @@ -39,4 +39,19 @@ devm_tegra_core_dev_init_opp_table(struct device *dev, } #endif +static inline int +devm_tegra_core_dev_init_opp_table_common(struct device *dev) +{ + struct tegra_core_opp_params opp_params = {}; + int err; + + opp_params.init_state = true; + + err = devm_tegra_core_dev_init_opp_table(dev, &opp_params); + if (err != -ENODEV) + return err; + + return 0; +} + #endif /* __SOC_TEGRA_COMMON_H__ */ -- cgit v1.2.3 From c6aeaf56f468a565f6d2f27325fc07d35cdcd3cb Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 9 Sep 2021 15:51:24 +0200 Subject: drm/tegra: Implement correct DMA-BUF semantics DMA-BUF requires that each device that accesses a DMA-BUF attaches to it separately. To do so the host1x_bo_pin() and host1x_bo_unpin() functions need to be reimplemented so that they can return a mapping, which either represents an attachment or a map of the driver's own GEM object. Signed-off-by: Thierry Reding --- include/linux/host1x.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 7bccf589aba7..157326074df8 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -7,6 +7,7 @@ #define __LINUX_HOST1X_H #include +#include #include enum host1x_class { @@ -82,12 +83,23 @@ struct host1x_client { struct host1x_bo; struct sg_table; +struct host1x_bo_mapping { + struct dma_buf_attachment *attach; + enum dma_data_direction direction; + struct host1x_bo *bo; + struct sg_table *sgt; + unsigned int chunks; + struct device *dev; + dma_addr_t phys; + size_t size; +}; + struct host1x_bo_ops { struct host1x_bo *(*get)(struct host1x_bo *bo); void (*put)(struct host1x_bo *bo); - struct sg_table *(*pin)(struct device *dev, struct host1x_bo *bo, - dma_addr_t *phys); - void (*unpin)(struct device *dev, struct sg_table *sgt); + struct host1x_bo_mapping *(*pin)(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir); + void (*unpin)(struct host1x_bo_mapping *map); void *(*mmap)(struct host1x_bo *bo); void (*munmap)(struct host1x_bo *bo, void *addr); }; @@ -112,17 +124,15 @@ static inline void host1x_bo_put(struct host1x_bo *bo) bo->ops->put(bo); } -static inline struct sg_table *host1x_bo_pin(struct device *dev, - struct host1x_bo *bo, - dma_addr_t *phys) +static inline struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir) { - return bo->ops->pin(dev, bo, phys); + return bo->ops->pin(dev, bo, dir); } -static inline void host1x_bo_unpin(struct device *dev, struct host1x_bo *bo, - struct sg_table *sgt) +static inline void host1x_bo_unpin(struct host1x_bo_mapping *map) { - bo->ops->unpin(dev, sgt); + map->bo->ops->unpin(map); } static inline void *host1x_bo_mmap(struct host1x_bo *bo) -- cgit v1.2.3 From 1f39b1dfa53c84b56d7ad37fed44afda7004959d Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 7 Feb 2020 16:50:52 +0100 Subject: drm/tegra: Implement buffer object cache This cache is used to avoid mapping and unmapping buffer objects unnecessarily. Mappings are cached per client and stay hot until the buffer object is destroyed. Signed-off-by: Thierry Reding --- include/linux/host1x.h | 53 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 157326074df8..d0bb16cdd005 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -8,6 +8,7 @@ #include #include +#include #include enum host1x_class { @@ -24,6 +25,28 @@ struct iommu_group; u64 host1x_get_dma_mask(struct host1x *host1x); +/** + * struct host1x_bo_cache - host1x buffer object cache + * @mappings: list of mappings + * @lock: synchronizes accesses to the list of mappings + */ +struct host1x_bo_cache { + struct list_head mappings; + struct mutex lock; +}; + +static inline void host1x_bo_cache_init(struct host1x_bo_cache *cache) +{ + INIT_LIST_HEAD(&cache->mappings); + mutex_init(&cache->lock); +} + +static inline void host1x_bo_cache_destroy(struct host1x_bo_cache *cache) +{ + /* XXX warn if not empty? */ + mutex_destroy(&cache->lock); +} + /** * struct host1x_client_ops - host1x client operations * @early_init: host1x client early initialization code @@ -74,6 +97,8 @@ struct host1x_client { struct host1x_client *parent; unsigned int usecount; struct mutex lock; + + struct host1x_bo_cache cache; }; /* @@ -84,16 +109,26 @@ struct host1x_bo; struct sg_table; struct host1x_bo_mapping { + struct kref ref; struct dma_buf_attachment *attach; enum dma_data_direction direction; + struct list_head list; struct host1x_bo *bo; struct sg_table *sgt; unsigned int chunks; struct device *dev; dma_addr_t phys; size_t size; + + struct host1x_bo_cache *cache; + struct list_head entry; }; +static inline struct host1x_bo_mapping *to_host1x_bo_mapping(struct kref *ref) +{ + return container_of(ref, struct host1x_bo_mapping, ref); +} + struct host1x_bo_ops { struct host1x_bo *(*get)(struct host1x_bo *bo); void (*put)(struct host1x_bo *bo); @@ -106,11 +141,15 @@ struct host1x_bo_ops { struct host1x_bo { const struct host1x_bo_ops *ops; + struct list_head mappings; + spinlock_t lock; }; static inline void host1x_bo_init(struct host1x_bo *bo, const struct host1x_bo_ops *ops) { + INIT_LIST_HEAD(&bo->mappings); + spin_lock_init(&bo->lock); bo->ops = ops; } @@ -124,16 +163,10 @@ static inline void host1x_bo_put(struct host1x_bo *bo) bo->ops->put(bo); } -static inline struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, - enum dma_data_direction dir) -{ - return bo->ops->pin(dev, bo, dir); -} - -static inline void host1x_bo_unpin(struct host1x_bo_mapping *map) -{ - map->bo->ops->unpin(map); -} +struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir, + struct host1x_bo_cache *cache); +void host1x_bo_unpin(struct host1x_bo_mapping *map); static inline void *host1x_bo_mmap(struct host1x_bo *bo) { -- cgit v1.2.3 From 46f226c93d35b936aeec6eb31da932dc2e86f413 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 16 Sep 2021 17:55:17 +0300 Subject: drm/tegra: Add NVDEC driver Add support for booting and using NVDEC on Tegra210, Tegra186 and Tegra194 to the Host1x and TegraDRM drivers. Booting in secure mode is not currently supported. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index d0bb16cdd005..2ca53d7ed7ca 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -17,6 +17,8 @@ enum host1x_class { HOST1X_CLASS_GR2D_SB = 0x52, HOST1X_CLASS_VIC = 0x5D, HOST1X_CLASS_GR3D = 0x60, + HOST1X_CLASS_NVDEC = 0xF0, + HOST1X_CLASS_NVDEC1 = 0xF5, }; struct host1x; -- cgit v1.2.3 From 9ca790f44606109071ab1a3a37ed99e91794c37c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 1 Dec 2021 02:23:16 +0300 Subject: gpu: host1x: Add host1x_channel_stop() Add host1x_channel_stop() which waits till channel becomes idle and then stops the channel hardware. This is needed for supporting suspend/resume by host1x drivers since the hardware state is lost after power-gating, thus the channel needs to be stopped before client enters into suspend. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding --- include/linux/host1x.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 2ca53d7ed7ca..e8dc5bc41f79 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -226,6 +226,7 @@ struct host1x_job; struct host1x_channel *host1x_channel_request(struct host1x_client *client); struct host1x_channel *host1x_channel_get(struct host1x_channel *channel); +void host1x_channel_stop(struct host1x_channel *channel); void host1x_channel_put(struct host1x_channel *channel); int host1x_job_submit(struct host1x_job *job); -- cgit v1.2.3 From c0cdc89072a3e1ae3981437f385de14b7bba8fd8 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 27 Oct 2021 16:15:04 +0100 Subject: irqchip/gic-v3-its: Give the percpu rdist struct its own flags field Later patches will require tracking some per-rdist status. Reuse the bytes "lost" to padding within the __percpu rdist struct as a flags field, and re-encode ->lpi_enabled within said flags. No change in functionality intended. Signed-off-by: Valentin Schneider Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211027151506.2085066-2-valentin.schneider@arm.com --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 81cbf85f73de..0dc34d7d735a 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -615,7 +615,7 @@ struct rdists { void __iomem *rd_base; struct page *pend_page; phys_addr_t phys_base; - bool lpi_enabled; + u64 flags; cpumask_t *vpe_table_mask; void *vpe_l1_base; } __percpu *rdist; -- cgit v1.2.3 From d23bc2bc1d634658d7fa96395419c1c553a784f0 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 27 Oct 2021 16:15:05 +0100 Subject: irqchip/gic-v3-its: Postpone LPI pending table freeing and memreserve Memory used by the LPI tables have to be made persistent for kexec to have a chance to work, as explained in [1]. If they have been made persistent and we are booting into a kexec'd kernel, we also need to free the pages that were preemptively allocated by the new kernel for those tables. Both of those operations currently happen during its_cpu_init(), which happens in a _STARTING (IOW atomic) cpuhp callback for secondary CPUs. efi_mem_reserve_iomem() issues a GFP_ATOMIC allocation, which unfortunately doesn't work under PREEMPT_RT (this ends up grabbing a non-raw spinlock, which can sleep under PREEMPT_RT). Similarly, freeing the pages ends up grabbing a sleepable spinlock. Since the memreserve is only required by kexec, it doesn't have to be done so early in the secondary boot process. Issue the reservation in a new CPUHP_AP_ONLINE_DYN cpuhp callback, and piggy-back the page freeing on top of it. A CPU gets to run the body of this new callback exactly once. As kexec issues a machine_shutdown() prior to machine_kexec(), it will be serialized vs a CPU being plugged to life by the hotplug machinery - either the CPU will have been brought up and have had its redistributor's pending table memreserved, or it never went online and will have its table allocated by the new kernel. [1]: https://lore.kernel.org/lkml/20180921195954.21574-1-marc.zyngier@arm.com/ Signed-off-by: Valentin Schneider Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211027151506.2085066-3-valentin.schneider@arm.com --- include/linux/irqchip/arm-gic-v3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 0dc34d7d735a..51b85506ae90 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -632,6 +632,7 @@ struct rdists { struct irq_domain; struct fwnode_handle; +int __init its_lpi_memreserve_init(void); int its_cpu_init(void); int its_init(struct fwnode_handle *handle, struct rdists *rdists, struct irq_domain *domain); -- cgit v1.2.3 From 835f442fdbce33a47a6bde356643fd7e3ef7ec1b Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 27 Oct 2021 16:15:06 +0100 Subject: irqchip/gic-v3-its: Limit memreserve cpuhp state lifetime The new memreserve cpuhp callback only needs to survive up until a point where every CPU in the system has booted once. Beyond that, it becomes a no-op and can be put in the bin. Signed-off-by: Valentin Schneider Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211027151506.2085066-4-valentin.schneider@arm.com --- include/linux/irqchip/arm-gic-v3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 51b85506ae90..12d91f0dedf9 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -624,6 +624,7 @@ struct rdists { u64 flags; u32 gicd_typer; u32 gicd_typer2; + int cpuhp_memreserve_state; bool has_vlpis; bool has_rvpeid; bool has_direct_lpi; -- cgit v1.2.3 From 92e1bcee067f1722703bf61047ef674be1b6d1c0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 16 Dec 2021 13:05:06 +0100 Subject: fib: rules: remove duplicated nla policies The attributes are identical in all implementations so move the ipv4 one into the core and remove the per-family nla policies. Signed-off-by: Florian Westphal Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- include/net/fib_rules.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index bd07484ab9dd..4183204915dc 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -91,7 +91,6 @@ struct fib_rules_ops { void (*flush_cache)(struct fib_rules_ops *ops); int nlgroup; - const struct nla_policy *policy; struct list_head rules_list; struct module *owner; struct net *fro_net; -- cgit v1.2.3 From 66495f301c69288a32b9d4bdb60c20ef42d90e6e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 16 Dec 2021 13:05:07 +0100 Subject: fib: expand fib_rule_policy Now that there is only one fib nla_policy there is no need to keep the macro around. Place it where its used. Signed-off-by: Florian Westphal Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- include/net/fib_rules.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 4183204915dc..82da359bca03 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -102,26 +102,6 @@ struct fib_rule_notifier_info { struct fib_rule *rule; }; -#define FRA_GENERIC_POLICY \ - [FRA_UNSPEC] = { .strict_start_type = FRA_DPORT_RANGE + 1 }, \ - [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ - [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ - [FRA_PRIORITY] = { .type = NLA_U32 }, \ - [FRA_FWMARK] = { .type = NLA_U32 }, \ - [FRA_TUN_ID] = { .type = NLA_U64 }, \ - [FRA_FWMASK] = { .type = NLA_U32 }, \ - [FRA_TABLE] = { .type = NLA_U32 }, \ - [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ - [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ - [FRA_GOTO] = { .type = NLA_U32 }, \ - [FRA_L3MDEV] = { .type = NLA_U8 }, \ - [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \ - [FRA_PROTOCOL] = { .type = NLA_U8 }, \ - [FRA_IP_PROTO] = { .type = NLA_U8 }, \ - [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \ - [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) } - - static inline void fib_rule_get(struct fib_rule *rule) { refcount_inc(&rule->refcnt); -- cgit v1.2.3 From fc5e0e37621973758e7e49dd85f28673e72e4b85 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 12 Nov 2021 13:35:34 +0100 Subject: dt-bindings: Update headers for Tegra234 Add a few more clocks that will be used in follow-up patches to enable more functionality on Tegra234. Signed-off-by: Mikko Perttunen Reviewed-by: Rob Herring Signed-off-by: Thierry Reding --- include/dt-bindings/clock/tegra234-clock.h | 17 ++++++++++++++--- include/dt-bindings/reset/tegra234-reset.h | 12 ++++++++++-- 2 files changed, 24 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/tegra234-clock.h b/include/dt-bindings/clock/tegra234-clock.h index 2c82072950ee..21ed0c732fb9 100644 --- a/include/dt-bindings/clock/tegra234-clock.h +++ b/include/dt-bindings/clock/tegra234-clock.h @@ -4,11 +4,22 @@ #ifndef DT_BINDINGS_CLOCK_TEGRA234_CLOCK_H #define DT_BINDINGS_CLOCK_TEGRA234_CLOCK_H +/** + * @file + * @defgroup bpmp_clock_ids Clock ID's + * @{ + */ /** @brief output of gate CLK_ENB_FUSE */ -#define TEGRA234_CLK_FUSE 40 +#define TEGRA234_CLK_FUSE 40U /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_SDMMC4 */ -#define TEGRA234_CLK_SDMMC4 123 +#define TEGRA234_CLK_SDMMC4 123U /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_UARTA */ -#define TEGRA234_CLK_UARTA 155 +#define TEGRA234_CLK_UARTA 155U +/** @brief CLK_RST_CONTROLLER_CLK_SOURCE_SDMMC_LEGACY_TM switch divider output */ +#define TEGRA234_CLK_SDMMC_LEGACY_TM 219U +/** @brief PLL controlled by CLK_RST_CONTROLLER_PLLC4_BASE */ +#define TEGRA234_CLK_PLLC4 237U +/** @brief 32K input clock provided by PMIC */ +#define TEGRA234_CLK_CLK_32K 289U #endif diff --git a/include/dt-bindings/reset/tegra234-reset.h b/include/dt-bindings/reset/tegra234-reset.h index b3c63be06d2d..50e13bced642 100644 --- a/include/dt-bindings/reset/tegra234-reset.h +++ b/include/dt-bindings/reset/tegra234-reset.h @@ -4,7 +4,15 @@ #ifndef DT_BINDINGS_RESET_TEGRA234_RESET_H #define DT_BINDINGS_RESET_TEGRA234_RESET_H -#define TEGRA234_RESET_SDMMC4 85 -#define TEGRA234_RESET_UARTA 100 +/** + * @file + * @defgroup bpmp_reset_ids Reset ID's + * @brief Identifiers for Resets controllable by firmware + * @{ + */ +#define TEGRA234_RESET_SDMMC4 85U +#define TEGRA234_RESET_UARTA 100U + +/** @} */ #endif -- cgit v1.2.3 From c3859c1436e31461f27365ec6fc751c9bbeff3e2 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 13 Dec 2021 17:21:47 +0100 Subject: dt-bindings: memory: tegra: Add Tegra234 support Document the variant of the memory controller and external memory controllers found on Tegra234 and add some memory client and SMMU stream ID definitions for use in device tree files. Reviewed-by: Rob Herring Signed-off-by: Thierry Reding Reviewed-by: Krzysztof Kozlowski Signed-off-by: Thierry Reding --- include/dt-bindings/clock/tegra234-clock.h | 9 +++++++++ include/dt-bindings/memory/tegra234-mc.h | 32 ++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 include/dt-bindings/memory/tegra234-mc.h (limited to 'include') diff --git a/include/dt-bindings/clock/tegra234-clock.h b/include/dt-bindings/clock/tegra234-clock.h index 21ed0c732fb9..8d7e66e1b6ef 100644 --- a/include/dt-bindings/clock/tegra234-clock.h +++ b/include/dt-bindings/clock/tegra234-clock.h @@ -9,6 +9,15 @@ * @defgroup bpmp_clock_ids Clock ID's * @{ */ +/** + * @brief controls the EMC clock frequency. + * @details Doing a clk_set_rate on this clock will select the + * appropriate clock source, program the source rate and execute a + * specific sequence to switch to the new clock source for both memory + * controllers. This can be used to control the balance between memory + * throughput and memory controller power. + */ +#define TEGRA234_CLK_EMC 31U /** @brief output of gate CLK_ENB_FUSE */ #define TEGRA234_CLK_FUSE 40U /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_SDMMC4 */ diff --git a/include/dt-bindings/memory/tegra234-mc.h b/include/dt-bindings/memory/tegra234-mc.h new file mode 100644 index 000000000000..2662f70c15c6 --- /dev/null +++ b/include/dt-bindings/memory/tegra234-mc.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ + +#ifndef DT_BINDINGS_MEMORY_TEGRA234_MC_H +#define DT_BINDINGS_MEMORY_TEGRA234_MC_H + +/* special clients */ +#define TEGRA234_SID_INVALID 0x00 +#define TEGRA234_SID_PASSTHROUGH 0x7f + + +/* NISO1 stream IDs */ +#define TEGRA234_SID_SDMMC4 0x02 +#define TEGRA234_SID_BPMP 0x10 + +/* + * memory client IDs + */ + +/* sdmmcd memory read client */ +#define TEGRA234_MEMORY_CLIENT_SDMMCRAB 0x63 +/* sdmmcd memory write client */ +#define TEGRA234_MEMORY_CLIENT_SDMMCWAB 0x67 +/* BPMP read client */ +#define TEGRA234_MEMORY_CLIENT_BPMPR 0x93 +/* BPMP write client */ +#define TEGRA234_MEMORY_CLIENT_BPMPW 0x94 +/* BPMPDMA read client */ +#define TEGRA234_MEMORY_CLIENT_BPMPDMAR 0x95 +/* BPMPDMA write client */ +#define TEGRA234_MEMORY_CLIENT_BPMPDMAW 0x96 + +#endif -- cgit v1.2.3 From 3c67d44de787dff288d7f2a51c372b22f7356db6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 3 Dec 2021 06:48:53 -0700 Subject: block: add mq_ops->queue_rqs hook If we have a list of requests in our plug list, send it to the driver in one go, if possible. The driver must set mq_ops->queue_rqs() to support this, if not the usual one-by-one path is used. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 772f8f921526..550996cf419c 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -492,6 +492,14 @@ struct blk_mq_ops { */ void (*commit_rqs)(struct blk_mq_hw_ctx *); + /** + * @queue_rqs: Queue a list of new requests. Driver is guaranteed + * that each request belongs to the same queue. If the driver doesn't + * empty the @rqlist completely, then the rest will be queued + * individually by the block layer upon return. + */ + void (*queue_rqs)(struct request **rqlist); + /** * @get_budget: Reserve budget before queue request, once .queue_rq is * run, it is driver's responsibility to release the -- cgit v1.2.3 From 38bb8a7264daf0ff5bb3024ae94bc465de78203d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 25 Jun 2021 11:29:51 +0200 Subject: dt-bindings: clock: starfive: Add JH7100 clock definitions Add all clock outputs for the StarFive JH7100 clock generator. Based on work by Ahmad Fatoum for Barebox, with "JH7100_" prefixes added to all definitions. Acked-by: Rob Herring Acked-by: Stephen Boyd Signed-off-by: Geert Uytterhoeven Signed-off-by: Emil Renner Berthing --- include/dt-bindings/clock/starfive-jh7100.h | 202 ++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 include/dt-bindings/clock/starfive-jh7100.h (limited to 'include') diff --git a/include/dt-bindings/clock/starfive-jh7100.h b/include/dt-bindings/clock/starfive-jh7100.h new file mode 100644 index 000000000000..aa0863b9728d --- /dev/null +++ b/include/dt-bindings/clock/starfive-jh7100.h @@ -0,0 +1,202 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2021 Ahmad Fatoum, Pengutronix + */ + +#ifndef __DT_BINDINGS_CLOCK_STARFIVE_JH7100_H__ +#define __DT_BINDINGS_CLOCK_STARFIVE_JH7100_H__ + +#define JH7100_CLK_CPUNDBUS_ROOT 0 +#define JH7100_CLK_DLA_ROOT 1 +#define JH7100_CLK_DSP_ROOT 2 +#define JH7100_CLK_GMACUSB_ROOT 3 +#define JH7100_CLK_PERH0_ROOT 4 +#define JH7100_CLK_PERH1_ROOT 5 +#define JH7100_CLK_VIN_ROOT 6 +#define JH7100_CLK_VOUT_ROOT 7 +#define JH7100_CLK_AUDIO_ROOT 8 +#define JH7100_CLK_CDECHIFI4_ROOT 9 +#define JH7100_CLK_CDEC_ROOT 10 +#define JH7100_CLK_VOUTBUS_ROOT 11 +#define JH7100_CLK_CPUNBUS_ROOT_DIV 12 +#define JH7100_CLK_DSP_ROOT_DIV 13 +#define JH7100_CLK_PERH0_SRC 14 +#define JH7100_CLK_PERH1_SRC 15 +#define JH7100_CLK_PLL0_TESTOUT 16 +#define JH7100_CLK_PLL1_TESTOUT 17 +#define JH7100_CLK_PLL2_TESTOUT 18 +#define JH7100_CLK_PLL2_REF 19 +#define JH7100_CLK_CPU_CORE 20 +#define JH7100_CLK_CPU_AXI 21 +#define JH7100_CLK_AHB_BUS 22 +#define JH7100_CLK_APB1_BUS 23 +#define JH7100_CLK_APB2_BUS 24 +#define JH7100_CLK_DOM3AHB_BUS 25 +#define JH7100_CLK_DOM7AHB_BUS 26 +#define JH7100_CLK_U74_CORE0 27 +#define JH7100_CLK_U74_CORE1 28 +#define JH7100_CLK_U74_AXI 29 +#define JH7100_CLK_U74RTC_TOGGLE 30 +#define JH7100_CLK_SGDMA2P_AXI 31 +#define JH7100_CLK_DMA2PNOC_AXI 32 +#define JH7100_CLK_SGDMA2P_AHB 33 +#define JH7100_CLK_DLA_BUS 34 +#define JH7100_CLK_DLA_AXI 35 +#define JH7100_CLK_DLANOC_AXI 36 +#define JH7100_CLK_DLA_APB 37 +#define JH7100_CLK_VP6_CORE 38 +#define JH7100_CLK_VP6BUS_SRC 39 +#define JH7100_CLK_VP6_AXI 40 +#define JH7100_CLK_VCDECBUS_SRC 41 +#define JH7100_CLK_VDEC_BUS 42 +#define JH7100_CLK_VDEC_AXI 43 +#define JH7100_CLK_VDECBRG_MAIN 44 +#define JH7100_CLK_VDEC_BCLK 45 +#define JH7100_CLK_VDEC_CCLK 46 +#define JH7100_CLK_VDEC_APB 47 +#define JH7100_CLK_JPEG_AXI 48 +#define JH7100_CLK_JPEG_CCLK 49 +#define JH7100_CLK_JPEG_APB 50 +#define JH7100_CLK_GC300_2X 51 +#define JH7100_CLK_GC300_AHB 52 +#define JH7100_CLK_JPCGC300_AXIBUS 53 +#define JH7100_CLK_GC300_AXI 54 +#define JH7100_CLK_JPCGC300_MAIN 55 +#define JH7100_CLK_VENC_BUS 56 +#define JH7100_CLK_VENC_AXI 57 +#define JH7100_CLK_VENCBRG_MAIN 58 +#define JH7100_CLK_VENC_BCLK 59 +#define JH7100_CLK_VENC_CCLK 60 +#define JH7100_CLK_VENC_APB 61 +#define JH7100_CLK_DDRPLL_DIV2 62 +#define JH7100_CLK_DDRPLL_DIV4 63 +#define JH7100_CLK_DDRPLL_DIV8 64 +#define JH7100_CLK_DDROSC_DIV2 65 +#define JH7100_CLK_DDRC0 66 +#define JH7100_CLK_DDRC1 67 +#define JH7100_CLK_DDRPHY_APB 68 +#define JH7100_CLK_NOC_ROB 69 +#define JH7100_CLK_NOC_COG 70 +#define JH7100_CLK_NNE_AHB 71 +#define JH7100_CLK_NNEBUS_SRC1 72 +#define JH7100_CLK_NNE_BUS 73 +#define JH7100_CLK_NNE_AXI 74 +#define JH7100_CLK_NNENOC_AXI 75 +#define JH7100_CLK_DLASLV_AXI 76 +#define JH7100_CLK_DSPX2C_AXI 77 +#define JH7100_CLK_HIFI4_SRC 78 +#define JH7100_CLK_HIFI4_COREFREE 79 +#define JH7100_CLK_HIFI4_CORE 80 +#define JH7100_CLK_HIFI4_BUS 81 +#define JH7100_CLK_HIFI4_AXI 82 +#define JH7100_CLK_HIFI4NOC_AXI 83 +#define JH7100_CLK_SGDMA1P_BUS 84 +#define JH7100_CLK_SGDMA1P_AXI 85 +#define JH7100_CLK_DMA1P_AXI 86 +#define JH7100_CLK_X2C_AXI 87 +#define JH7100_CLK_USB_BUS 88 +#define JH7100_CLK_USB_AXI 89 +#define JH7100_CLK_USBNOC_AXI 90 +#define JH7100_CLK_USBPHY_ROOTDIV 91 +#define JH7100_CLK_USBPHY_125M 92 +#define JH7100_CLK_USBPHY_PLLDIV25M 93 +#define JH7100_CLK_USBPHY_25M 94 +#define JH7100_CLK_AUDIO_DIV 95 +#define JH7100_CLK_AUDIO_SRC 96 +#define JH7100_CLK_AUDIO_12288 97 +#define JH7100_CLK_VIN_SRC 98 +#define JH7100_CLK_ISP0_BUS 99 +#define JH7100_CLK_ISP0_AXI 100 +#define JH7100_CLK_ISP0NOC_AXI 101 +#define JH7100_CLK_ISPSLV_AXI 102 +#define JH7100_CLK_ISP1_BUS 103 +#define JH7100_CLK_ISP1_AXI 104 +#define JH7100_CLK_ISP1NOC_AXI 105 +#define JH7100_CLK_VIN_BUS 106 +#define JH7100_CLK_VIN_AXI 107 +#define JH7100_CLK_VINNOC_AXI 108 +#define JH7100_CLK_VOUT_SRC 109 +#define JH7100_CLK_DISPBUS_SRC 110 +#define JH7100_CLK_DISP_BUS 111 +#define JH7100_CLK_DISP_AXI 112 +#define JH7100_CLK_DISPNOC_AXI 113 +#define JH7100_CLK_SDIO0_AHB 114 +#define JH7100_CLK_SDIO0_CCLKINT 115 +#define JH7100_CLK_SDIO0_CCLKINT_INV 116 +#define JH7100_CLK_SDIO1_AHB 117 +#define JH7100_CLK_SDIO1_CCLKINT 118 +#define JH7100_CLK_SDIO1_CCLKINT_INV 119 +#define JH7100_CLK_GMAC_AHB 120 +#define JH7100_CLK_GMAC_ROOT_DIV 121 +#define JH7100_CLK_GMAC_PTP_REF 122 +#define JH7100_CLK_GMAC_GTX 123 +#define JH7100_CLK_GMAC_RMII_TX 124 +#define JH7100_CLK_GMAC_RMII_RX 125 +#define JH7100_CLK_GMAC_TX 126 +#define JH7100_CLK_GMAC_TX_INV 127 +#define JH7100_CLK_GMAC_RX_PRE 128 +#define JH7100_CLK_GMAC_RX_INV 129 +#define JH7100_CLK_GMAC_RMII 130 +#define JH7100_CLK_GMAC_TOPHYREF 131 +#define JH7100_CLK_SPI2AHB_AHB 132 +#define JH7100_CLK_SPI2AHB_CORE 133 +#define JH7100_CLK_EZMASTER_AHB 134 +#define JH7100_CLK_E24_AHB 135 +#define JH7100_CLK_E24RTC_TOGGLE 136 +#define JH7100_CLK_QSPI_AHB 137 +#define JH7100_CLK_QSPI_APB 138 +#define JH7100_CLK_QSPI_REF 139 +#define JH7100_CLK_SEC_AHB 140 +#define JH7100_CLK_AES 141 +#define JH7100_CLK_SHA 142 +#define JH7100_CLK_PKA 143 +#define JH7100_CLK_TRNG_APB 144 +#define JH7100_CLK_OTP_APB 145 +#define JH7100_CLK_UART0_APB 146 +#define JH7100_CLK_UART0_CORE 147 +#define JH7100_CLK_UART1_APB 148 +#define JH7100_CLK_UART1_CORE 149 +#define JH7100_CLK_SPI0_APB 150 +#define JH7100_CLK_SPI0_CORE 151 +#define JH7100_CLK_SPI1_APB 152 +#define JH7100_CLK_SPI1_CORE 153 +#define JH7100_CLK_I2C0_APB 154 +#define JH7100_CLK_I2C0_CORE 155 +#define JH7100_CLK_I2C1_APB 156 +#define JH7100_CLK_I2C1_CORE 157 +#define JH7100_CLK_GPIO_APB 158 +#define JH7100_CLK_UART2_APB 159 +#define JH7100_CLK_UART2_CORE 160 +#define JH7100_CLK_UART3_APB 161 +#define JH7100_CLK_UART3_CORE 162 +#define JH7100_CLK_SPI2_APB 163 +#define JH7100_CLK_SPI2_CORE 164 +#define JH7100_CLK_SPI3_APB 165 +#define JH7100_CLK_SPI3_CORE 166 +#define JH7100_CLK_I2C2_APB 167 +#define JH7100_CLK_I2C2_CORE 168 +#define JH7100_CLK_I2C3_APB 169 +#define JH7100_CLK_I2C3_CORE 170 +#define JH7100_CLK_WDTIMER_APB 171 +#define JH7100_CLK_WDT_CORE 172 +#define JH7100_CLK_TIMER0_CORE 173 +#define JH7100_CLK_TIMER1_CORE 174 +#define JH7100_CLK_TIMER2_CORE 175 +#define JH7100_CLK_TIMER3_CORE 176 +#define JH7100_CLK_TIMER4_CORE 177 +#define JH7100_CLK_TIMER5_CORE 178 +#define JH7100_CLK_TIMER6_CORE 179 +#define JH7100_CLK_VP6INTC_APB 180 +#define JH7100_CLK_PWM_APB 181 +#define JH7100_CLK_MSI_APB 182 +#define JH7100_CLK_TEMP_APB 183 +#define JH7100_CLK_TEMP_SENSE 184 +#define JH7100_CLK_SYSERR_APB 185 + +#define JH7100_CLK_PLL0_OUT 186 +#define JH7100_CLK_PLL1_OUT 187 +#define JH7100_CLK_PLL2_OUT 188 + +#define JH7100_CLK_END 189 + +#endif /* __DT_BINDINGS_CLOCK_STARFIVE_JH7100_H__ */ -- cgit v1.2.3 From 810e287e83b69ff8563bde15cae9120c802ac5d7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 25 Jun 2021 11:30:00 +0200 Subject: dt-bindings: reset: Add StarFive JH7100 reset definitions Add all resets for the StarFive JH7100 reset controller. Based on work by Ahmad Fatoum for Barebox, with "JH7100_" prefixes added to all definitions. Acked-by: Rob Herring Signed-off-by: Geert Uytterhoeven Signed-off-by: Emil Renner Berthing --- include/dt-bindings/reset/starfive-jh7100.h | 126 ++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 include/dt-bindings/reset/starfive-jh7100.h (limited to 'include') diff --git a/include/dt-bindings/reset/starfive-jh7100.h b/include/dt-bindings/reset/starfive-jh7100.h new file mode 100644 index 000000000000..540e19254f39 --- /dev/null +++ b/include/dt-bindings/reset/starfive-jh7100.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2021 Ahmad Fatoum, Pengutronix + */ + +#ifndef __DT_BINDINGS_RESET_STARFIVE_JH7100_H__ +#define __DT_BINDINGS_RESET_STARFIVE_JH7100_H__ + +#define JH7100_RSTN_DOM3AHB_BUS 0 +#define JH7100_RSTN_DOM7AHB_BUS 1 +#define JH7100_RST_U74 2 +#define JH7100_RSTN_U74_AXI 3 +#define JH7100_RSTN_SGDMA2P_AHB 4 +#define JH7100_RSTN_SGDMA2P_AXI 5 +#define JH7100_RSTN_DMA2PNOC_AXI 6 +#define JH7100_RSTN_DLA_AXI 7 +#define JH7100_RSTN_DLANOC_AXI 8 +#define JH7100_RSTN_DLA_APB 9 +#define JH7100_RST_VP6_DRESET 10 +#define JH7100_RST_VP6_BRESET 11 +#define JH7100_RSTN_VP6_AXI 12 +#define JH7100_RSTN_VDECBRG_MAIN 13 +#define JH7100_RSTN_VDEC_AXI 14 +#define JH7100_RSTN_VDEC_BCLK 15 +#define JH7100_RSTN_VDEC_CCLK 16 +#define JH7100_RSTN_VDEC_APB 17 +#define JH7100_RSTN_JPEG_AXI 18 +#define JH7100_RSTN_JPEG_CCLK 19 +#define JH7100_RSTN_JPEG_APB 20 +#define JH7100_RSTN_JPCGC300_MAIN 21 +#define JH7100_RSTN_GC300_2X 22 +#define JH7100_RSTN_GC300_AXI 23 +#define JH7100_RSTN_GC300_AHB 24 +#define JH7100_RSTN_VENC_AXI 25 +#define JH7100_RSTN_VENCBRG_MAIN 26 +#define JH7100_RSTN_VENC_BCLK 27 +#define JH7100_RSTN_VENC_CCLK 28 +#define JH7100_RSTN_VENC_APB 29 +#define JH7100_RSTN_DDRPHY_APB 30 +#define JH7100_RSTN_NOC_ROB 31 +#define JH7100_RSTN_NOC_COG 32 +#define JH7100_RSTN_HIFI4_AXI 33 +#define JH7100_RSTN_HIFI4NOC_AXI 34 +#define JH7100_RST_HIFI4_DRESET 35 +#define JH7100_RST_HIFI4_BRESET 36 +#define JH7100_RSTN_USB_AXI 37 +#define JH7100_RSTN_USBNOC_AXI 38 +#define JH7100_RSTN_SGDMA1P_AXI 39 +#define JH7100_RSTN_DMA1P_AXI 40 +#define JH7100_RSTN_X2C_AXI 41 +#define JH7100_RSTN_NNE_AHB 42 +#define JH7100_RSTN_NNE_AXI 43 +#define JH7100_RSTN_NNENOC_AXI 44 +#define JH7100_RSTN_DLASLV_AXI 45 +#define JH7100_RSTN_DSPX2C_AXI 46 +#define JH7100_RSTN_VIN_SRC 47 +#define JH7100_RSTN_ISPSLV_AXI 48 +#define JH7100_RSTN_VIN_AXI 49 +#define JH7100_RSTN_VINNOC_AXI 50 +#define JH7100_RSTN_ISP0_AXI 51 +#define JH7100_RSTN_ISP0NOC_AXI 52 +#define JH7100_RSTN_ISP1_AXI 53 +#define JH7100_RSTN_ISP1NOC_AXI 54 +#define JH7100_RSTN_VOUT_SRC 55 +#define JH7100_RSTN_DISP_AXI 56 +#define JH7100_RSTN_DISPNOC_AXI 57 +#define JH7100_RSTN_SDIO0_AHB 58 +#define JH7100_RSTN_SDIO1_AHB 59 +#define JH7100_RSTN_GMAC_AHB 60 +#define JH7100_RSTN_SPI2AHB_AHB 61 +#define JH7100_RSTN_SPI2AHB_CORE 62 +#define JH7100_RSTN_EZMASTER_AHB 63 +#define JH7100_RST_E24 64 +#define JH7100_RSTN_QSPI_AHB 65 +#define JH7100_RSTN_QSPI_CORE 66 +#define JH7100_RSTN_QSPI_APB 67 +#define JH7100_RSTN_SEC_AHB 68 +#define JH7100_RSTN_AES 69 +#define JH7100_RSTN_PKA 70 +#define JH7100_RSTN_SHA 71 +#define JH7100_RSTN_TRNG_APB 72 +#define JH7100_RSTN_OTP_APB 73 +#define JH7100_RSTN_UART0_APB 74 +#define JH7100_RSTN_UART0_CORE 75 +#define JH7100_RSTN_UART1_APB 76 +#define JH7100_RSTN_UART1_CORE 77 +#define JH7100_RSTN_SPI0_APB 78 +#define JH7100_RSTN_SPI0_CORE 79 +#define JH7100_RSTN_SPI1_APB 80 +#define JH7100_RSTN_SPI1_CORE 81 +#define JH7100_RSTN_I2C0_APB 82 +#define JH7100_RSTN_I2C0_CORE 83 +#define JH7100_RSTN_I2C1_APB 84 +#define JH7100_RSTN_I2C1_CORE 85 +#define JH7100_RSTN_GPIO_APB 86 +#define JH7100_RSTN_UART2_APB 87 +#define JH7100_RSTN_UART2_CORE 88 +#define JH7100_RSTN_UART3_APB 89 +#define JH7100_RSTN_UART3_CORE 90 +#define JH7100_RSTN_SPI2_APB 91 +#define JH7100_RSTN_SPI2_CORE 92 +#define JH7100_RSTN_SPI3_APB 93 +#define JH7100_RSTN_SPI3_CORE 94 +#define JH7100_RSTN_I2C2_APB 95 +#define JH7100_RSTN_I2C2_CORE 96 +#define JH7100_RSTN_I2C3_APB 97 +#define JH7100_RSTN_I2C3_CORE 98 +#define JH7100_RSTN_WDTIMER_APB 99 +#define JH7100_RSTN_WDT 100 +#define JH7100_RSTN_TIMER0 101 +#define JH7100_RSTN_TIMER1 102 +#define JH7100_RSTN_TIMER2 103 +#define JH7100_RSTN_TIMER3 104 +#define JH7100_RSTN_TIMER4 105 +#define JH7100_RSTN_TIMER5 106 +#define JH7100_RSTN_TIMER6 107 +#define JH7100_RSTN_VP6INTC_APB 108 +#define JH7100_RSTN_PWM_APB 109 +#define JH7100_RSTN_MSI_APB 110 +#define JH7100_RSTN_TEMP_APB 111 +#define JH7100_RSTN_TEMP_SENSE 112 +#define JH7100_RSTN_SYSERR_APB 113 + +#define JH7100_RSTN_END 114 + +#endif /* __DT_BINDINGS_RESET_STARFIVE_JH7100_H__ */ -- cgit v1.2.3 From 3021114b3d172cf80c074c81425741f9e26c6679 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Tue, 6 Jul 2021 20:19:06 +0200 Subject: dt-bindings: pinctrl: Add StarFive pinctrl definitions Add definitons for pins and GPIO input, output and output enable signals on the StarFive JH7100 SoC. Acked-by: Rob Herring Signed-off-by: Emil Renner Berthing --- include/dt-bindings/pinctrl/pinctrl-starfive.h | 275 +++++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 include/dt-bindings/pinctrl/pinctrl-starfive.h (limited to 'include') diff --git a/include/dt-bindings/pinctrl/pinctrl-starfive.h b/include/dt-bindings/pinctrl/pinctrl-starfive.h new file mode 100644 index 000000000000..de4f75c2c9e8 --- /dev/null +++ b/include/dt-bindings/pinctrl/pinctrl-starfive.h @@ -0,0 +1,275 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2021 Emil Renner Berthing + */ + +#ifndef __DT_BINDINGS_PINCTRL_STARFIVE_H__ +#define __DT_BINDINGS_PINCTRL_STARFIVE_H__ + +#define PAD_GPIO_OFFSET 0 +#define PAD_FUNC_SHARE_OFFSET 64 +#define PAD_GPIO(x) (PAD_GPIO_OFFSET + (x)) +#define PAD_FUNC_SHARE(x) (PAD_FUNC_SHARE_OFFSET + (x)) + +/* + * GPIOMUX bits: + * | 31 - 24 | 23 - 16 | 15 - 8 | 7 | 6 | 5 - 0 | + * | dout | doen | din | dout rev | doen rev | gpio nr | + * + * dout: output signal + * doen: output enable signal + * din: optional input signal, 0xff = none + * dout rev: output signal reverse bit + * doen rev: output enable signal reverse bit + * gpio nr: gpio number, 0 - 63 + */ +#define GPIOMUX(n, dout, doen, din) ( \ + (((dout) & 0x80000000) >> (31 - 7)) | (((dout) & 0xff) << 24) | \ + (((doen) & 0x80000000) >> (31 - 6)) | (((doen) & 0xff) << 16) | \ + (((din) & 0xff) << 8) | \ + ((n) & 0x3f)) + +#define GPO_REVERSE 0x80000000 + +#define GPO_LOW 0 +#define GPO_HIGH 1 +#define GPO_ENABLE 0 +#define GPO_DISABLE 1 +#define GPO_CLK_GMAC_PAPHYREF 2 +#define GPO_JTAG_TDO 3 +#define GPO_JTAG_TDO_OEN 4 +#define GPO_DMIC_CLK_OUT 5 +#define GPO_DSP_JTDOEN_PAD 6 +#define GPO_DSP_JTDO_PAD 7 +#define GPO_I2C0_PAD_SCK_OE 8 +#define GPO_I2C0_PAD_SCK_OEN (GPO_I2C0_PAD_SCK_OE | GPO_REVERSE) +#define GPO_I2C0_PAD_SDA_OE 9 +#define GPO_I2C0_PAD_SDA_OEN (GPO_I2C0_PAD_SDA_OE | GPO_REVERSE) +#define GPO_I2C1_PAD_SCK_OE 10 +#define GPO_I2C1_PAD_SCK_OEN (GPO_I2C1_PAD_SCK_OE | GPO_REVERSE) +#define GPO_I2C1_PAD_SDA_OE 11 +#define GPO_I2C1_PAD_SDA_OEN (GPO_I2C1_PAD_SDA_OE | GPO_REVERSE) +#define GPO_I2C2_PAD_SCK_OE 12 +#define GPO_I2C2_PAD_SCK_OEN (GPO_I2C2_PAD_SCK_OE | GPO_REVERSE) +#define GPO_I2C2_PAD_SDA_OE 13 +#define GPO_I2C2_PAD_SDA_OEN (GPO_I2C2_PAD_SDA_OE | GPO_REVERSE) +#define GPO_I2C3_PAD_SCK_OE 14 +#define GPO_I2C3_PAD_SCK_OEN (GPO_I2C3_PAD_SCK_OE | GPO_REVERSE) +#define GPO_I2C3_PAD_SDA_OE 15 +#define GPO_I2C3_PAD_SDA_OEN (GPO_I2C3_PAD_SDA_OE | GPO_REVERSE) +#define GPO_I2SRX_BCLK_OUT 16 +#define GPO_I2SRX_BCLK_OUT_OEN 17 +#define GPO_I2SRX_LRCK_OUT 18 +#define GPO_I2SRX_LRCK_OUT_OEN 19 +#define GPO_I2SRX_MCLK_OUT 20 +#define GPO_I2STX_BCLK_OUT 21 +#define GPO_I2STX_BCLK_OUT_OEN 22 +#define GPO_I2STX_LRCK_OUT 23 +#define GPO_I2STX_LRCK_OUT_OEN 24 +#define GPO_I2STX_MCLK_OUT 25 +#define GPO_I2STX_SDOUT0 26 +#define GPO_I2STX_SDOUT1 27 +#define GPO_LCD_PAD_CSM_N 28 +#define GPO_PWM_PAD_OE_N_BIT0 29 +#define GPO_PWM_PAD_OE_N_BIT1 30 +#define GPO_PWM_PAD_OE_N_BIT2 31 +#define GPO_PWM_PAD_OE_N_BIT3 32 +#define GPO_PWM_PAD_OE_N_BIT4 33 +#define GPO_PWM_PAD_OE_N_BIT5 34 +#define GPO_PWM_PAD_OE_N_BIT6 35 +#define GPO_PWM_PAD_OE_N_BIT7 36 +#define GPO_PWM_PAD_OUT_BIT0 37 +#define GPO_PWM_PAD_OUT_BIT1 38 +#define GPO_PWM_PAD_OUT_BIT2 39 +#define GPO_PWM_PAD_OUT_BIT3 40 +#define GPO_PWM_PAD_OUT_BIT4 41 +#define GPO_PWM_PAD_OUT_BIT5 42 +#define GPO_PWM_PAD_OUT_BIT6 43 +#define GPO_PWM_PAD_OUT_BIT7 44 +#define GPO_PWMDAC_LEFT_OUT 45 +#define GPO_PWMDAC_RIGHT_OUT 46 +#define GPO_QSPI_CSN1_OUT 47 +#define GPO_QSPI_CSN2_OUT 48 +#define GPO_QSPI_CSN3_OUT 49 +#define GPO_REGISTER23_SCFG_CMSENSOR_RST0 50 +#define GPO_REGISTER23_SCFG_CMSENSOR_RST1 51 +#define GPO_REGISTER32_SCFG_GMAC_PHY_RSTN 52 +#define GPO_SDIO0_PAD_CARD_POWER_EN 53 +#define GPO_SDIO0_PAD_CCLK_OUT 54 +#define GPO_SDIO0_PAD_CCMD_OE 55 +#define GPO_SDIO0_PAD_CCMD_OEN (GPO_SDIO0_PAD_CCMD_OE | GPO_REVERSE) +#define GPO_SDIO0_PAD_CCMD_OUT 56 +#define GPO_SDIO0_PAD_CDATA_OE_BIT0 57 +#define GPO_SDIO0_PAD_CDATA_OEN_BIT0 (GPO_SDIO0_PAD_CDATA_OE_BIT0 | GPO_REVERSE) +#define GPO_SDIO0_PAD_CDATA_OE_BIT1 58 +#define GPO_SDIO0_PAD_CDATA_OEN_BIT1 (GPO_SDIO0_PAD_CDATA_OE_BIT1 | GPO_REVERSE) +#define GPO_SDIO0_PAD_CDATA_OE_BIT2 59 +#define GPO_SDIO0_PAD_CDATA_OEN_BIT2 (GPO_SDIO0_PAD_CDATA_OE_BIT2 | GPO_REVERSE) +#define GPO_SDIO0_PAD_CDATA_OE_BIT3 60 +#define GPO_SDIO0_PAD_CDATA_OEN_BIT3 (GPO_SDIO0_PAD_CDATA_OE_BIT3 | GPO_REVERSE) +#define GPO_SDIO0_PAD_CDATA_OE_BIT4 61 +#define GPO_SDIO0_PAD_CDATA_OEN_BIT4 (GPO_SDIO0_PAD_CDATA_OE_BIT4 | GPO_REVERSE) +#define GPO_SDIO0_PAD_CDATA_OE_BIT5 62 +#define GPO_SDIO0_PAD_CDATA_OEN_BIT5 (GPO_SDIO0_PAD_CDATA_OE_BIT5 | GPO_REVERSE) +#define GPO_SDIO0_PAD_CDATA_OE_BIT6 63 +#define GPO_SDIO0_PAD_CDATA_OEN_BIT6 (GPO_SDIO0_PAD_CDATA_OE_BIT6 | GPO_REVERSE) +#define GPO_SDIO0_PAD_CDATA_OE_BIT7 64 +#define GPO_SDIO0_PAD_CDATA_OEN_BIT7 (GPO_SDIO0_PAD_CDATA_OE_BIT7 | GPO_REVERSE) +#define GPO_SDIO0_PAD_CDATA_OUT_BIT0 65 +#define GPO_SDIO0_PAD_CDATA_OUT_BIT1 66 +#define GPO_SDIO0_PAD_CDATA_OUT_BIT2 67 +#define GPO_SDIO0_PAD_CDATA_OUT_BIT3 68 +#define GPO_SDIO0_PAD_CDATA_OUT_BIT4 69 +#define GPO_SDIO0_PAD_CDATA_OUT_BIT5 70 +#define GPO_SDIO0_PAD_CDATA_OUT_BIT6 71 +#define GPO_SDIO0_PAD_CDATA_OUT_BIT7 72 +#define GPO_SDIO0_PAD_RST_N 73 +#define GPO_SDIO1_PAD_CARD_POWER_EN 74 +#define GPO_SDIO1_PAD_CCLK_OUT 75 +#define GPO_SDIO1_PAD_CCMD_OE 76 +#define GPO_SDIO1_PAD_CCMD_OEN (GPO_SDIO1_PAD_CCMD_OE | GPO_REVERSE) +#define GPO_SDIO1_PAD_CCMD_OUT 77 +#define GPO_SDIO1_PAD_CDATA_OE_BIT0 78 +#define GPO_SDIO1_PAD_CDATA_OEN_BIT0 (GPO_SDIO1_PAD_CDATA_OE_BIT0 | GPO_REVERSE) +#define GPO_SDIO1_PAD_CDATA_OE_BIT1 79 +#define GPO_SDIO1_PAD_CDATA_OEN_BIT1 (GPO_SDIO1_PAD_CDATA_OE_BIT1 | GPO_REVERSE) +#define GPO_SDIO1_PAD_CDATA_OE_BIT2 80 +#define GPO_SDIO1_PAD_CDATA_OEN_BIT2 (GPO_SDIO1_PAD_CDATA_OE_BIT2 | GPO_REVERSE) +#define GPO_SDIO1_PAD_CDATA_OE_BIT3 81 +#define GPO_SDIO1_PAD_CDATA_OEN_BIT3 (GPO_SDIO1_PAD_CDATA_OE_BIT3 | GPO_REVERSE) +#define GPO_SDIO1_PAD_CDATA_OE_BIT4 82 +#define GPO_SDIO1_PAD_CDATA_OEN_BIT4 (GPO_SDIO1_PAD_CDATA_OE_BIT4 | GPO_REVERSE) +#define GPO_SDIO1_PAD_CDATA_OE_BIT5 83 +#define GPO_SDIO1_PAD_CDATA_OEN_BIT5 (GPO_SDIO1_PAD_CDATA_OE_BIT5 | GPO_REVERSE) +#define GPO_SDIO1_PAD_CDATA_OE_BIT6 84 +#define GPO_SDIO1_PAD_CDATA_OEN_BIT6 (GPO_SDIO1_PAD_CDATA_OE_BIT6 | GPO_REVERSE) +#define GPO_SDIO1_PAD_CDATA_OE_BIT7 85 +#define GPO_SDIO1_PAD_CDATA_OEN_BIT7 (GPO_SDIO1_PAD_CDATA_OE_BIT7 | GPO_REVERSE) +#define GPO_SDIO1_PAD_CDATA_OUT_BIT0 86 +#define GPO_SDIO1_PAD_CDATA_OUT_BIT1 87 +#define GPO_SDIO1_PAD_CDATA_OUT_BIT2 88 +#define GPO_SDIO1_PAD_CDATA_OUT_BIT3 89 +#define GPO_SDIO1_PAD_CDATA_OUT_BIT4 90 +#define GPO_SDIO1_PAD_CDATA_OUT_BIT5 91 +#define GPO_SDIO1_PAD_CDATA_OUT_BIT6 92 +#define GPO_SDIO1_PAD_CDATA_OUT_BIT7 93 +#define GPO_SDIO1_PAD_RST_N 94 +#define GPO_SPDIF_TX_SDOUT 95 +#define GPO_SPDIF_TX_SDOUT_OEN 96 +#define GPO_SPI0_PAD_OE_N 97 +#define GPO_SPI0_PAD_SCK_OUT 98 +#define GPO_SPI0_PAD_SS_0_N 99 +#define GPO_SPI0_PAD_SS_1_N 100 +#define GPO_SPI0_PAD_TXD 101 +#define GPO_SPI1_PAD_OE_N 102 +#define GPO_SPI1_PAD_SCK_OUT 103 +#define GPO_SPI1_PAD_SS_0_N 104 +#define GPO_SPI1_PAD_SS_1_N 105 +#define GPO_SPI1_PAD_TXD 106 +#define GPO_SPI2_PAD_OE_N 107 +#define GPO_SPI2_PAD_SCK_OUT 108 +#define GPO_SPI2_PAD_SS_0_N 109 +#define GPO_SPI2_PAD_SS_1_N 110 +#define GPO_SPI2_PAD_TXD 111 +#define GPO_SPI2AHB_PAD_OE_N_BIT0 112 +#define GPO_SPI2AHB_PAD_OE_N_BIT1 113 +#define GPO_SPI2AHB_PAD_OE_N_BIT2 114 +#define GPO_SPI2AHB_PAD_OE_N_BIT3 115 +#define GPO_SPI2AHB_PAD_TXD_BIT0 116 +#define GPO_SPI2AHB_PAD_TXD_BIT1 117 +#define GPO_SPI2AHB_PAD_TXD_BIT2 118 +#define GPO_SPI2AHB_PAD_TXD_BIT3 119 +#define GPO_SPI3_PAD_OE_N 120 +#define GPO_SPI3_PAD_SCK_OUT 121 +#define GPO_SPI3_PAD_SS_0_N 122 +#define GPO_SPI3_PAD_SS_1_N 123 +#define GPO_SPI3_PAD_TXD 124 +#define GPO_UART0_PAD_DTRN 125 +#define GPO_UART0_PAD_RTSN 126 +#define GPO_UART0_PAD_SOUT 127 +#define GPO_UART1_PAD_SOUT 128 +#define GPO_UART2_PAD_DTR_N 129 +#define GPO_UART2_PAD_RTS_N 130 +#define GPO_UART2_PAD_SOUT 131 +#define GPO_UART3_PAD_SOUT 132 +#define GPO_USB_DRV_BUS 133 + +#define GPI_CPU_JTAG_TCK 0 +#define GPI_CPU_JTAG_TDI 1 +#define GPI_CPU_JTAG_TMS 2 +#define GPI_CPU_JTAG_TRST 3 +#define GPI_DMIC_SDIN_BIT0 4 +#define GPI_DMIC_SDIN_BIT1 5 +#define GPI_DSP_JTCK_PAD 6 +#define GPI_DSP_JTDI_PAD 7 +#define GPI_DSP_JTMS_PAD 8 +#define GPI_DSP_TRST_PAD 9 +#define GPI_I2C0_PAD_SCK_IN 10 +#define GPI_I2C0_PAD_SDA_IN 11 +#define GPI_I2C1_PAD_SCK_IN 12 +#define GPI_I2C1_PAD_SDA_IN 13 +#define GPI_I2C2_PAD_SCK_IN 14 +#define GPI_I2C2_PAD_SDA_IN 15 +#define GPI_I2C3_PAD_SCK_IN 16 +#define GPI_I2C3_PAD_SDA_IN 17 +#define GPI_I2SRX_BCLK_IN 18 +#define GPI_I2SRX_LRCK_IN 19 +#define GPI_I2SRX_SDIN_BIT0 20 +#define GPI_I2SRX_SDIN_BIT1 21 +#define GPI_I2SRX_SDIN_BIT2 22 +#define GPI_I2STX_BCLK_IN 23 +#define GPI_I2STX_LRCK_IN 24 +#define GPI_SDIO0_PAD_CARD_DETECT_N 25 +#define GPI_SDIO0_PAD_CARD_WRITE_PRT 26 +#define GPI_SDIO0_PAD_CCMD_IN 27 +#define GPI_SDIO0_PAD_CDATA_IN_BIT0 28 +#define GPI_SDIO0_PAD_CDATA_IN_BIT1 29 +#define GPI_SDIO0_PAD_CDATA_IN_BIT2 30 +#define GPI_SDIO0_PAD_CDATA_IN_BIT3 31 +#define GPI_SDIO0_PAD_CDATA_IN_BIT4 32 +#define GPI_SDIO0_PAD_CDATA_IN_BIT5 33 +#define GPI_SDIO0_PAD_CDATA_IN_BIT6 34 +#define GPI_SDIO0_PAD_CDATA_IN_BIT7 35 +#define GPI_SDIO1_PAD_CARD_DETECT_N 36 +#define GPI_SDIO1_PAD_CARD_WRITE_PRT 37 +#define GPI_SDIO1_PAD_CCMD_IN 38 +#define GPI_SDIO1_PAD_CDATA_IN_BIT0 39 +#define GPI_SDIO1_PAD_CDATA_IN_BIT1 40 +#define GPI_SDIO1_PAD_CDATA_IN_BIT2 41 +#define GPI_SDIO1_PAD_CDATA_IN_BIT3 42 +#define GPI_SDIO1_PAD_CDATA_IN_BIT4 43 +#define GPI_SDIO1_PAD_CDATA_IN_BIT5 44 +#define GPI_SDIO1_PAD_CDATA_IN_BIT6 45 +#define GPI_SDIO1_PAD_CDATA_IN_BIT7 46 +#define GPI_SPDIF_RX_SDIN 47 +#define GPI_SPI0_PAD_RXD 48 +#define GPI_SPI0_PAD_SS_IN_N 49 +#define GPI_SPI1_PAD_RXD 50 +#define GPI_SPI1_PAD_SS_IN_N 51 +#define GPI_SPI2_PAD_RXD 52 +#define GPI_SPI2_PAD_SS_IN_N 53 +#define GPI_SPI2AHB_PAD_RXD_BIT0 54 +#define GPI_SPI2AHB_PAD_RXD_BIT1 55 +#define GPI_SPI2AHB_PAD_RXD_BIT2 56 +#define GPI_SPI2AHB_PAD_RXD_BIT3 57 +#define GPI_SPI2AHB_PAD_SS_N 58 +#define GPI_SPI2AHB_SLV_SCLKIN 59 +#define GPI_SPI3_PAD_RXD 60 +#define GPI_SPI3_PAD_SS_IN_N 61 +#define GPI_UART0_PAD_CTSN 62 +#define GPI_UART0_PAD_DCDN 63 +#define GPI_UART0_PAD_DSRN 64 +#define GPI_UART0_PAD_RIN 65 +#define GPI_UART0_PAD_SIN 66 +#define GPI_UART1_PAD_SIN 67 +#define GPI_UART2_PAD_CTS_N 68 +#define GPI_UART2_PAD_DCD_N 69 +#define GPI_UART2_PAD_DSR_N 70 +#define GPI_UART2_PAD_RI_N 71 +#define GPI_UART2_PAD_SIN 72 +#define GPI_UART3_PAD_SIN 73 +#define GPI_USB_OVER_CURRENT 74 + +#define GPI_NONE 0xff + +#endif /* __DT_BINDINGS_PINCTRL_STARFIVE_H__ */ -- cgit v1.2.3 From 8a2ba1785c5803d59a63b6320ff54fd4a37a41ce Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 Dec 2021 07:31:21 +0100 Subject: block: remove the nr_task field from struct io_context Nothing ever looks at ->nr_tasks, so remove it. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20211209063131.18537-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index c1229fbd6691..82c7f4f5f4f5 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -99,7 +99,6 @@ struct io_cq { struct io_context { atomic_long_t refcount; atomic_t active_ref; - atomic_t nr_tasks; /* all the fields below are protected by this lock */ spinlock_t lock; -- cgit v1.2.3 From a411cd3cfdc5bbd1329d5b33dbf39e2b5213969d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 Dec 2021 07:31:27 +0100 Subject: block: move set_task_ioprio to blk-ioc.c Keep set_task_ioprio with the other low-level code that accesses the io_context structure. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20211209063131.18537-8-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 82c7f4f5f4f5..648331f35fc6 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -116,8 +116,6 @@ struct task_struct; #ifdef CONFIG_BLOCK void put_io_context(struct io_context *ioc); void exit_io_context(struct task_struct *task); -struct io_context *get_task_io_context(struct task_struct *task, - gfp_t gfp_flags, int node); int __copy_io(unsigned long clone_flags, struct task_struct *tsk); static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk) { -- cgit v1.2.3 From 5ef1630586317e92c9ebd7b4ce48f393b7ff790f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 Dec 2021 07:31:31 +0100 Subject: block: only build the icq tracking code when needed Only bfq needs to code to track icq, so make it conditional. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20211209063131.18537-12-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 648331f35fc6..14f7eaf1b443 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -100,16 +100,18 @@ struct io_context { atomic_long_t refcount; atomic_t active_ref; + unsigned short ioprio; + +#ifdef CONFIG_BLK_ICQ /* all the fields below are protected by this lock */ spinlock_t lock; - unsigned short ioprio; - struct radix_tree_root icq_tree; struct io_cq __rcu *icq_hint; struct hlist_head icq_list; struct work_struct release_work; +#endif /* CONFIG_BLK_ICQ */ }; struct task_struct; -- cgit v1.2.3 From 6d24d9546d6e6c524505e51bb5f76d9a83fac478 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Wed, 8 Dec 2021 10:10:35 +0100 Subject: dt-bindings: clk: qcom: Document MSM8976 Global Clock Controller Document the required properties and firmware clocks for gcc-msm8976 to operate nominally, and add header definitions for referencing the clocks from firmware. Signed-off-by: Marijn Suijten Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Rob Herring Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211208091036.132334-2-marijn.suijten@somainline.org --- include/dt-bindings/clock/qcom,gcc-msm8976.h | 240 +++++++++++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,gcc-msm8976.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gcc-msm8976.h b/include/dt-bindings/clock/qcom,gcc-msm8976.h new file mode 100644 index 000000000000..51955fd49426 --- /dev/null +++ b/include/dt-bindings/clock/qcom,gcc-msm8976.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (C) 2016, The Linux Foundation. All rights reserved. + * Copyright (C) 2016-2021, AngeloGioacchino Del Regno + * + */ + +#ifndef _DT_BINDINGS_CLK_MSM_GCC_8976_H +#define _DT_BINDINGS_CLK_MSM_GCC_8976_H + +#define GPLL0 0 +#define GPLL2 1 +#define GPLL3 2 +#define GPLL4 3 +#define GPLL6 4 +#define GPLL0_CLK_SRC 5 +#define GPLL2_CLK_SRC 6 +#define GPLL3_CLK_SRC 7 +#define GPLL4_CLK_SRC 8 +#define GPLL6_CLK_SRC 9 +#define GCC_BLSP1_QUP1_SPI_APPS_CLK 10 +#define GCC_BLSP1_QUP1_I2C_APPS_CLK 11 +#define GCC_BLSP1_QUP2_I2C_APPS_CLK 12 +#define GCC_BLSP1_QUP2_SPI_APPS_CLK 13 +#define GCC_BLSP1_QUP3_I2C_APPS_CLK 14 +#define GCC_BLSP1_QUP3_SPI_APPS_CLK 15 +#define GCC_BLSP1_QUP4_I2C_APPS_CLK 16 +#define GCC_BLSP1_QUP4_SPI_APPS_CLK 17 +#define GCC_BLSP1_UART1_APPS_CLK 18 +#define GCC_BLSP1_UART2_APPS_CLK 19 +#define GCC_BLSP2_QUP1_I2C_APPS_CLK 20 +#define GCC_BLSP2_QUP1_SPI_APPS_CLK 21 +#define GCC_BLSP2_QUP2_I2C_APPS_CLK 22 +#define GCC_BLSP2_QUP2_SPI_APPS_CLK 23 +#define GCC_BLSP2_QUP3_I2C_APPS_CLK 24 +#define GCC_BLSP2_QUP3_SPI_APPS_CLK 25 +#define GCC_BLSP2_QUP4_I2C_APPS_CLK 26 +#define GCC_BLSP2_QUP4_SPI_APPS_CLK 27 +#define GCC_BLSP2_UART1_APPS_CLK 28 +#define GCC_BLSP2_UART2_APPS_CLK 29 +#define GCC_CAMSS_CCI_AHB_CLK 30 +#define GCC_CAMSS_CCI_CLK 31 +#define GCC_CAMSS_CPP_AHB_CLK 32 +#define GCC_CAMSS_CPP_AXI_CLK 33 +#define GCC_CAMSS_CPP_CLK 34 +#define GCC_CAMSS_CSI0_AHB_CLK 35 +#define GCC_CAMSS_CSI0_CLK 36 +#define GCC_CAMSS_CSI0PHY_CLK 37 +#define GCC_CAMSS_CSI0PIX_CLK 38 +#define GCC_CAMSS_CSI0RDI_CLK 39 +#define GCC_CAMSS_CSI1_AHB_CLK 40 +#define GCC_CAMSS_CSI1_CLK 41 +#define GCC_CAMSS_CSI1PHY_CLK 42 +#define GCC_CAMSS_CSI1PIX_CLK 43 +#define GCC_CAMSS_CSI1RDI_CLK 44 +#define GCC_CAMSS_CSI2_AHB_CLK 45 +#define GCC_CAMSS_CSI2_CLK 46 +#define GCC_CAMSS_CSI2PHY_CLK 47 +#define GCC_CAMSS_CSI2PIX_CLK 48 +#define GCC_CAMSS_CSI2RDI_CLK 49 +#define GCC_CAMSS_CSI_VFE0_CLK 50 +#define GCC_CAMSS_CSI_VFE1_CLK 51 +#define GCC_CAMSS_GP0_CLK 52 +#define GCC_CAMSS_GP1_CLK 53 +#define GCC_CAMSS_ISPIF_AHB_CLK 54 +#define GCC_CAMSS_JPEG0_CLK 55 +#define GCC_CAMSS_JPEG_AHB_CLK 56 +#define GCC_CAMSS_JPEG_AXI_CLK 57 +#define GCC_CAMSS_MCLK0_CLK 58 +#define GCC_CAMSS_MCLK1_CLK 59 +#define GCC_CAMSS_MCLK2_CLK 60 +#define GCC_CAMSS_MICRO_AHB_CLK 61 +#define GCC_CAMSS_CSI0PHYTIMER_CLK 62 +#define GCC_CAMSS_CSI1PHYTIMER_CLK 63 +#define GCC_CAMSS_AHB_CLK 64 +#define GCC_CAMSS_TOP_AHB_CLK 65 +#define GCC_CAMSS_VFE0_CLK 66 +#define GCC_CAMSS_VFE_AHB_CLK 67 +#define GCC_CAMSS_VFE_AXI_CLK 68 +#define GCC_CAMSS_VFE1_AHB_CLK 69 +#define GCC_CAMSS_VFE1_AXI_CLK 70 +#define GCC_CAMSS_VFE1_CLK 71 +#define GCC_DCC_CLK 72 +#define GCC_GP1_CLK 73 +#define GCC_GP2_CLK 74 +#define GCC_GP3_CLK 75 +#define GCC_MDSS_AHB_CLK 76 +#define GCC_MDSS_AXI_CLK 77 +#define GCC_MDSS_ESC0_CLK 78 +#define GCC_MDSS_ESC1_CLK 79 +#define GCC_MDSS_MDP_CLK 80 +#define GCC_MDSS_VSYNC_CLK 81 +#define GCC_MSS_CFG_AHB_CLK 82 +#define GCC_MSS_Q6_BIMC_AXI_CLK 83 +#define GCC_PDM2_CLK 84 +#define GCC_PRNG_AHB_CLK 85 +#define GCC_PDM_AHB_CLK 86 +#define GCC_RBCPR_GFX_AHB_CLK 87 +#define GCC_RBCPR_GFX_CLK 88 +#define GCC_SDCC1_AHB_CLK 89 +#define GCC_SDCC1_APPS_CLK 90 +#define GCC_SDCC1_ICE_CORE_CLK 91 +#define GCC_SDCC2_AHB_CLK 92 +#define GCC_SDCC2_APPS_CLK 93 +#define GCC_SDCC3_AHB_CLK 94 +#define GCC_SDCC3_APPS_CLK 95 +#define GCC_USB2A_PHY_SLEEP_CLK 96 +#define GCC_USB_HS_PHY_CFG_AHB_CLK 97 +#define GCC_USB_FS_AHB_CLK 98 +#define GCC_USB_FS_IC_CLK 99 +#define GCC_USB_FS_SYSTEM_CLK 100 +#define GCC_USB_HS_AHB_CLK 101 +#define GCC_USB_HS_SYSTEM_CLK 102 +#define GCC_VENUS0_AHB_CLK 103 +#define GCC_VENUS0_AXI_CLK 104 +#define GCC_VENUS0_CORE0_VCODEC0_CLK 105 +#define GCC_VENUS0_CORE1_VCODEC0_CLK 106 +#define GCC_VENUS0_VCODEC0_CLK 107 +#define GCC_APSS_AHB_CLK 108 +#define GCC_APSS_AXI_CLK 109 +#define GCC_BLSP1_AHB_CLK 110 +#define GCC_BLSP2_AHB_CLK 111 +#define GCC_BOOT_ROM_AHB_CLK 112 +#define GCC_CRYPTO_AHB_CLK 113 +#define GCC_CRYPTO_AXI_CLK 114 +#define GCC_CRYPTO_CLK 115 +#define GCC_CPP_TBU_CLK 116 +#define GCC_APSS_TCU_CLK 117 +#define GCC_JPEG_TBU_CLK 118 +#define GCC_MDP_RT_TBU_CLK 119 +#define GCC_MDP_TBU_CLK 120 +#define GCC_SMMU_CFG_CLK 121 +#define GCC_VENUS_1_TBU_CLK 122 +#define GCC_VENUS_TBU_CLK 123 +#define GCC_VFE1_TBU_CLK 124 +#define GCC_VFE_TBU_CLK 125 +#define GCC_APS_0_CLK 126 +#define GCC_APS_1_CLK 127 +#define APS_0_CLK_SRC 128 +#define APS_1_CLK_SRC 129 +#define APSS_AHB_CLK_SRC 130 +#define BLSP1_QUP1_I2C_APPS_CLK_SRC 131 +#define BLSP1_QUP1_SPI_APPS_CLK_SRC 132 +#define BLSP1_QUP2_I2C_APPS_CLK_SRC 133 +#define BLSP1_QUP2_SPI_APPS_CLK_SRC 134 +#define BLSP1_QUP3_I2C_APPS_CLK_SRC 135 +#define BLSP1_QUP3_SPI_APPS_CLK_SRC 136 +#define BLSP1_QUP4_I2C_APPS_CLK_SRC 137 +#define BLSP1_QUP4_SPI_APPS_CLK_SRC 138 +#define BLSP1_UART1_APPS_CLK_SRC 139 +#define BLSP1_UART2_APPS_CLK_SRC 140 +#define BLSP2_QUP1_I2C_APPS_CLK_SRC 141 +#define BLSP2_QUP1_SPI_APPS_CLK_SRC 142 +#define BLSP2_QUP2_I2C_APPS_CLK_SRC 143 +#define BLSP2_QUP2_SPI_APPS_CLK_SRC 144 +#define BLSP2_QUP3_I2C_APPS_CLK_SRC 145 +#define BLSP2_QUP3_SPI_APPS_CLK_SRC 146 +#define BLSP2_QUP4_I2C_APPS_CLK_SRC 147 +#define BLSP2_QUP4_SPI_APPS_CLK_SRC 148 +#define BLSP2_UART1_APPS_CLK_SRC 149 +#define BLSP2_UART2_APPS_CLK_SRC 150 +#define CCI_CLK_SRC 151 +#define CPP_CLK_SRC 152 +#define CSI0_CLK_SRC 153 +#define CSI1_CLK_SRC 154 +#define CSI2_CLK_SRC 155 +#define CAMSS_GP0_CLK_SRC 156 +#define CAMSS_GP1_CLK_SRC 157 +#define JPEG0_CLK_SRC 158 +#define MCLK0_CLK_SRC 159 +#define MCLK1_CLK_SRC 160 +#define MCLK2_CLK_SRC 161 +#define CSI0PHYTIMER_CLK_SRC 162 +#define CSI1PHYTIMER_CLK_SRC 163 +#define CAMSS_TOP_AHB_CLK_SRC 164 +#define VFE0_CLK_SRC 165 +#define VFE1_CLK_SRC 166 +#define CRYPTO_CLK_SRC 167 +#define GP1_CLK_SRC 168 +#define GP2_CLK_SRC 169 +#define GP3_CLK_SRC 170 +#define ESC0_CLK_SRC 171 +#define ESC1_CLK_SRC 172 +#define MDP_CLK_SRC 173 +#define VSYNC_CLK_SRC 174 +#define PDM2_CLK_SRC 175 +#define RBCPR_GFX_CLK_SRC 176 +#define SDCC1_APPS_CLK_SRC 177 +#define SDCC1_ICE_CORE_CLK_SRC 178 +#define SDCC2_APPS_CLK_SRC 179 +#define SDCC3_APPS_CLK_SRC 180 +#define USB_FS_IC_CLK_SRC 181 +#define USB_FS_SYSTEM_CLK_SRC 182 +#define USB_HS_SYSTEM_CLK_SRC 183 +#define VCODEC0_CLK_SRC 184 +#define GCC_MDSS_BYTE0_CLK_SRC 185 +#define GCC_MDSS_BYTE1_CLK_SRC 186 +#define GCC_MDSS_BYTE0_CLK 187 +#define GCC_MDSS_BYTE1_CLK 188 +#define GCC_MDSS_PCLK0_CLK_SRC 189 +#define GCC_MDSS_PCLK1_CLK_SRC 190 +#define GCC_MDSS_PCLK0_CLK 191 +#define GCC_MDSS_PCLK1_CLK 192 +#define GCC_GFX3D_CLK_SRC 193 +#define GCC_GFX3D_OXILI_CLK 194 +#define GCC_GFX3D_BIMC_CLK 195 +#define GCC_GFX3D_OXILI_AHB_CLK 196 +#define GCC_GFX3D_OXILI_AON_CLK 197 +#define GCC_GFX3D_OXILI_GMEM_CLK 198 +#define GCC_GFX3D_OXILI_TIMER_CLK 199 +#define GCC_GFX3D_TBU0_CLK 200 +#define GCC_GFX3D_TBU1_CLK 201 +#define GCC_GFX3D_TCU_CLK 202 +#define GCC_GFX3D_GTCU_AHB_CLK 203 + +/* GCC block resets */ +#define RST_CAMSS_MICRO_BCR 0 +#define RST_USB_HS_BCR 1 +#define RST_QUSB2_PHY_BCR 2 +#define RST_USB2_HS_PHY_ONLY_BCR 3 +#define RST_USB_HS_PHY_CFG_AHB_BCR 4 +#define RST_USB_FS_BCR 5 +#define RST_CAMSS_CSI1PIX_BCR 6 +#define RST_CAMSS_CSI_VFE1_BCR 7 +#define RST_CAMSS_VFE1_BCR 8 +#define RST_CAMSS_CPP_BCR 9 + +/* GDSCs */ +#define VENUS_GDSC 0 +#define VENUS_CORE0_GDSC 1 +#define VENUS_CORE1_GDSC 2 +#define MDSS_GDSC 3 +#define JPEG_GDSC 4 +#define VFE0_GDSC 5 +#define VFE1_GDSC 6 +#define CPP_GDSC 7 +#define OXILI_GX_GDSC 8 +#define OXILI_CX_GDSC 9 + +#endif /* _DT_BINDINGS_CLK_MSM_GCC_8976_H */ -- cgit v1.2.3 From 85f5a74c2b9ba213d4102dc12ccbfdbe26472abb Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 8 Apr 2021 01:33:45 -0400 Subject: block: Add bio_add_folio() This is a thin wrapper around bio_add_page(). The main advantage here is the documentation that folios larger than 2GiB are not supported. It's not currently possible to allocate folios that large, but if it ever becomes possible, this function will fail gracefully instead of doing I/O to the wrong bytes. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Jens Axboe Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- include/linux/bio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index fe6bdfbbef66..a783cac49978 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -409,7 +409,8 @@ extern void bio_uninit(struct bio *); extern void bio_reset(struct bio *); void bio_chain(struct bio *, struct bio *); -extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); +int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off); +bool bio_add_folio(struct bio *, struct folio *, size_t len, size_t off); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); int bio_add_zone_append_page(struct bio *bio, struct page *page, -- cgit v1.2.3 From 640d1930bef4f87ec8d8d2b05f0f6edc1dfcf662 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Jan 2021 10:58:17 -0500 Subject: block: Add bio_for_each_folio_all() Allow callers to iterate over each folio instead of each page. The bio need not have been constructed using folios originally. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Jens Axboe Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- include/linux/bio.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index a783cac49978..e3c9e8207f12 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -166,7 +166,7 @@ static inline void bio_advance(struct bio *bio, unsigned int nbytes) */ #define bio_for_each_bvec_all(bvl, bio, i) \ for (i = 0, bvl = bio_first_bvec_all(bio); \ - i < (bio)->bi_vcnt; i++, bvl++) \ + i < (bio)->bi_vcnt; i++, bvl++) #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) @@ -260,6 +260,57 @@ static inline struct bio_vec *bio_last_bvec_all(struct bio *bio) return &bio->bi_io_vec[bio->bi_vcnt - 1]; } +/** + * struct folio_iter - State for iterating all folios in a bio. + * @folio: The current folio we're iterating. NULL after the last folio. + * @offset: The byte offset within the current folio. + * @length: The number of bytes in this iteration (will not cross folio + * boundary). + */ +struct folio_iter { + struct folio *folio; + size_t offset; + size_t length; + /* private: for use by the iterator */ + size_t _seg_count; + int _i; +}; + +static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, + int i) +{ + struct bio_vec *bvec = bio_first_bvec_all(bio) + i; + + fi->folio = page_folio(bvec->bv_page); + fi->offset = bvec->bv_offset + + PAGE_SIZE * (bvec->bv_page - &fi->folio->page); + fi->_seg_count = bvec->bv_len; + fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); + fi->_i = i; +} + +static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) +{ + fi->_seg_count -= fi->length; + if (fi->_seg_count) { + fi->folio = folio_next(fi->folio); + fi->offset = 0; + fi->length = min(folio_size(fi->folio), fi->_seg_count); + } else if (fi->_i + 1 < bio->bi_vcnt) { + bio_first_folio(fi, bio, fi->_i + 1); + } else { + fi->folio = NULL; + } +} + +/** + * bio_for_each_folio_all - Iterate over each folio in a bio. + * @fi: struct folio_iter which is updated for each folio. + * @bio: struct bio to iterate over. + */ +#define bio_for_each_folio_all(fi, bio) \ + for (bio_first_folio(&fi, bio, 0); fi.folio; bio_next_folio(&fi, bio)) + enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ -- cgit v1.2.3 From 8306a5f56305521d8b307b4ee1f69949fbb49279 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 28 Apr 2021 07:51:36 -0400 Subject: iomap: Add iomap_invalidate_folio Keep iomap_invalidatepage around as a wrapper for use in address_space operations. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- include/linux/iomap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 6d1b08d0ae93..29491fb9c5ba 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -225,6 +225,7 @@ void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); int iomap_is_partially_uptodate(struct page *page, unsigned long from, unsigned long count); int iomap_releasepage(struct page *page, gfp_t gfp_mask); +void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); void iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len); #ifdef CONFIG_MIGRATION -- cgit v1.2.3 From ea6fa4961aab8f90a8aa03575a98b4bda368d4b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= Date: Fri, 10 Dec 2021 21:01:26 +0100 Subject: rtc: mc146818-lib: fix RTC presence check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To prevent an infinite loop in mc146818_get_time(), commit 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs") added a check for RTC availability. Together with a later fix, it checked if bit 6 in register 0x0d is cleared. This, however, caused a false negative on a motherboard with an AMD SB710 southbridge; according to the specification [1], bit 6 of register 0x0d of this chipset is a scratchbit. This caused a regression in Linux 5.11 - the RTC was determined broken by the kernel and not used by rtc-cmos.c [3]. This problem was also reported in Fedora [4]. As a better alternative, check whether the UIP ("Update-in-progress") bit is set for longer then 10ms. If that is the case, then apparently the RTC is either absent (and all register reads return 0xff) or broken. Also limit the number of loop iterations in mc146818_get_time() to 10 to prevent an infinite loop there. The functions mc146818_get_time() and mc146818_does_rtc_work() will be refactored later in this patch series, in order to fix a separate problem with reading / setting the RTC alarm time. This is done so to avoid a confusion about what is being fixed when. In a previous approach to this problem, I implemented a check whether the RTC_HOURS register contains a value <= 24. This, however, sometimes did not work correctly on my Intel Kaby Lake laptop. According to Intel's documentation [2], "the time and date RAM locations (0-9) are disconnected from the external bus" during the update cycle so reading this register without checking the UIP bit is incorrect. [1] AMD SB700/710/750 Register Reference Guide, page 308, https://developer.amd.com/wordpress/media/2012/10/43009_sb7xx_rrg_pub_1.00.pdf [2] 7th Generation Intel ® Processor Family I/O for U/Y Platforms [...] Datasheet Volume 1 of 2, page 209 Intel's Document Number: 334658-006, https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/7th-and-8th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.pdf [3] Functions in arch/x86/kernel/rtc.c apparently were using it. [4] https://bugzilla.redhat.com/show_bug.cgi?id=1936688 Fixes: 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs") Fixes: ebb22a059436 ("rtc: mc146818: Dont test for bit 0-5 in Register D") Signed-off-by: Mateusz Jończyk Cc: Thomas Gleixner Cc: Alessandro Zummo Cc: Alexandre Belloni Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211210200131.153887-5-mat.jonczyk@o2.pl --- include/linux/mc146818rtc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index 0661af17a758..69c80c4325bf 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -123,6 +123,7 @@ struct cmos_rtc_board_info { #define RTC_IO_EXTENT_USED RTC_IO_EXTENT #endif /* ARCH_RTC_LOCATION */ +bool mc146818_does_rtc_work(void); unsigned int mc146818_get_time(struct rtc_time *time); int mc146818_set_time(struct rtc_time *time); -- cgit v1.2.3 From ec5895c0f2d87b9bf4185db1915e40fa6fcfc0ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= Date: Fri, 10 Dec 2021 21:01:27 +0100 Subject: rtc: mc146818-lib: extract mc146818_avoid_UIP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function mc146818_get_time() contains an elaborate mechanism of reading the RTC time while no RTC update is in progress. It turns out that reading the RTC alarm clock also requires avoiding the RTC update. Therefore, the mechanism in mc146818_get_time() should be reused - so extract it into a separate function. The logic in mc146818_avoid_UIP() is same as in mc146818_get_time() except that after every if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { there is now "mdelay(1)". To avoid producing a very unreadable patch, mc146818_get_time() will be refactored to use mc146818_avoid_UIP() in the next patch. Signed-off-by: Mateusz Jończyk Cc: Alessandro Zummo Cc: Alexandre Belloni Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20211210200131.153887-6-mat.jonczyk@o2.pl --- include/linux/mc146818rtc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index 69c80c4325bf..67fb0a12becc 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -127,4 +127,7 @@ bool mc146818_does_rtc_work(void); unsigned int mc146818_get_time(struct rtc_time *time); int mc146818_set_time(struct rtc_time *time); +bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), + void *param); + #endif /* _MC146818RTC_H */ -- cgit v1.2.3 From 34fff62827b254f8a43633cc878deb04bf11297c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:18:54 +0100 Subject: device: Move MSI related data into a struct The only unconditional part of MSI data in struct device is the irqdomain pointer. Everything else can be allocated on demand. Create a data structure and move the irqdomain pointer into it. The other MSI specific parts are going to be removed from struct device in later steps. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211210221813.617178827@linutronix.de --- include/linux/device.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 2a22875238a6..f212b7a7b156 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -371,6 +371,16 @@ struct dev_links_info { enum dl_dev_state status; }; +/** + * struct dev_msi_info - Device data related to MSI + * @domain: The MSI interrupt domain associated to the device + */ +struct dev_msi_info { +#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN + struct irq_domain *domain; +#endif +}; + /** * struct device - The basic device structure * @parent: The device's "parent" device, the device to which it is attached. @@ -407,8 +417,8 @@ struct dev_links_info { * @em_pd: device's energy model performance domain * @pins: For device pin management. * See Documentation/driver-api/pin-control.rst for details. + * @msi: MSI related data * @msi_list: Hosts MSI descriptors - * @msi_domain: The generic MSI domain this device is using. * @numa_node: NUMA node this device is close to. * @dma_ops: DMA mapping operations for this device. * @dma_mask: Dma mask (if dma'ble device). @@ -500,12 +510,10 @@ struct device { struct em_perf_domain *em_pd; #endif -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN - struct irq_domain *msi_domain; -#endif #ifdef CONFIG_PINCTRL struct dev_pin_info *pins; #endif + struct dev_msi_info msi; #ifdef CONFIG_GENERIC_MSI_IRQ struct list_head msi_list; #endif @@ -666,7 +674,7 @@ static inline void set_dev_node(struct device *dev, int node) static inline struct irq_domain *dev_get_msi_domain(const struct device *dev) { #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN - return dev->msi_domain; + return dev->msi.domain; #else return NULL; #endif @@ -675,7 +683,7 @@ static inline struct irq_domain *dev_get_msi_domain(const struct device *dev) static inline void dev_set_msi_domain(struct device *dev, struct irq_domain *d) { #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN - dev->msi_domain = d; + dev->msi.domain = d; #endif } -- cgit v1.2.3 From 013bd8e543c2c777b586cf033c588ea82bd502db Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:18:55 +0100 Subject: device: Add device:: Msi_data pointer and struct msi_device_data Create struct msi_device_data and add a pointer of that type to struct dev_msi_info, which is part of struct device. Provide an allocator function which can be invoked from the MSI interrupt allocation code pathes. Add a properties field to the data structure as a first member so the allocation size is not zero bytes. The field will be uses later on. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211210221813.676660809@linutronix.de --- include/linux/device.h | 5 +++++ include/linux/msi.h | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index f212b7a7b156..f0033cd93631 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -45,6 +45,7 @@ struct iommu_ops; struct iommu_group; struct dev_pin_info; struct dev_iommu; +struct msi_device_data; /** * struct subsys_interface - interfaces to device functions @@ -374,11 +375,15 @@ struct dev_links_info { /** * struct dev_msi_info - Device data related to MSI * @domain: The MSI interrupt domain associated to the device + * @data: Pointer to MSI device data */ struct dev_msi_info { #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN struct irq_domain *domain; #endif +#ifdef CONFIG_GENERIC_MSI_IRQ + struct msi_device_data *data; +#endif }; /** diff --git a/include/linux/msi.h b/include/linux/msi.h index ba4a39c430b5..7e4c8fd7c65d 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -171,6 +171,16 @@ struct msi_desc { }; }; +/** + * msi_device_data - MSI per device data + * @properties: MSI properties which are interesting to drivers + */ +struct msi_device_data { + unsigned long properties; +}; + +int msi_setup_device_data(struct device *dev); + /* Helpers to hide struct msi_desc implementation details */ #define msi_desc_to_dev(desc) ((desc)->dev) #define dev_to_msi_list(dev) (&(dev)->msi_list) @@ -233,10 +243,16 @@ void pci_msi_mask_irq(struct irq_data *data); void pci_msi_unmask_irq(struct irq_data *data); #ifdef CONFIG_SYSFS +int msi_device_populate_sysfs(struct device *dev); +void msi_device_destroy_sysfs(struct device *dev); + const struct attribute_group **msi_populate_sysfs(struct device *dev); void msi_destroy_sysfs(struct device *dev, const struct attribute_group **msi_irq_groups); #else +static inline int msi_device_populate_sysfs(struct device *dev) { return 0; } +static inline void msi_device_destroy_sysfs(struct device *dev) { } + static inline const struct attribute_group **msi_populate_sysfs(struct device *dev) { return NULL; @@ -384,6 +400,8 @@ enum { MSI_FLAG_MUST_REACTIVATE = (1 << 5), /* Is level-triggered capable, using two messages */ MSI_FLAG_LEVEL_CAPABLE = (1 << 6), + /* Populate sysfs on alloc() and destroy it on free() */ + MSI_FLAG_DEV_SYSFS = (1 << 7), }; int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, -- cgit v1.2.3 From 3f35d2cf9fbc656db82579d849cc69c373b1ad0d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Dec 2021 18:16:44 +0100 Subject: PCI/MSI: Decouple MSI[-X] disable from pcim_release() The MSI core will introduce runtime allocation of MSI related data. This data will be devres managed and has to be set up before enabling PCI/MSI[-X]. This would introduce an ordering issue vs. pcim_release(). The setup order is: pcim_enable_device() devres_alloc(pcim_release...); ... pci_irq_alloc() msi_setup_device_data() devres_alloc(msi_device_data_release, ...) and once the device is released these release functions are invoked in the opposite order: msi_device_data_release() ... pcim_release() pci_disable_msi[x]() which is obviously wrong, because pci_disable_msi[x]() requires the MSI data to be available to tear down the MSI[-X] interrupts. Remove the MSI[-X] teardown from pcim_release() and add an explicit action to be installed on the attempt of enabling PCI/MSI[-X]. This allows the MSI core data allocation to be ordered correctly in a subsequent step. Reported-by: Nishanth Menon Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/87tuf9rdoj.ffs@tglx --- include/linux/pci.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 5cc46baef519..a09736d3e05e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -425,7 +425,8 @@ struct pci_dev { unsigned int ats_enabled:1; /* Address Translation Svc */ unsigned int pasid_enabled:1; /* Process Address Space ID */ unsigned int pri_enabled:1; /* Page Request Interface */ - unsigned int is_managed:1; + unsigned int is_managed:1; /* Managed via devres */ + unsigned int is_msi_managed:1; /* MSI release via devres installed */ unsigned int needs_freset:1; /* Requires fundamental reset */ unsigned int state_saved:1; unsigned int is_physfn:1; -- cgit v1.2.3 From bf6e054e0e3fbc9614355b760e18c8a14f952a4e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:19:03 +0100 Subject: genirq/msi: Provide msi_device_populate/destroy_sysfs() Add new allocation functions which can be activated by domain info flags. They store the groups pointer in struct msi_device_data. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211210221813.988659194@linutronix.de --- include/linux/msi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 7e4c8fd7c65d..1b96dc483b88 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -56,6 +56,8 @@ struct irq_data; struct msi_desc; struct pci_dev; struct platform_msi_priv_data; +struct attribute_group; + void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); #ifdef CONFIG_GENERIC_MSI_IRQ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); @@ -174,9 +176,11 @@ struct msi_desc { /** * msi_device_data - MSI per device data * @properties: MSI properties which are interesting to drivers + * @attrs: Pointer to the sysfs attribute group */ struct msi_device_data { unsigned long properties; + const struct attribute_group **attrs; }; int msi_setup_device_data(struct device *dev); -- cgit v1.2.3 From ffd84485e6beb9cad3e5a133d88201b995298c33 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:19:05 +0100 Subject: PCI/MSI: Let the irq code handle sysfs groups Set the domain info flag which makes the core code handle sysfs groups and put an explicit invocation into the legacy code. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20211210221814.048612053@linutronix.de --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index a09736d3e05e..0a7b6b2f163b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -476,7 +476,6 @@ struct pci_dev { #ifdef CONFIG_PCI_MSI void __iomem *msix_base; raw_spinlock_t msi_lock; - const struct attribute_group **msi_irq_groups; #endif struct pci_vpd vpd; #ifdef CONFIG_PCIE_DPC -- cgit v1.2.3 From 24cff375fdb663c2238f06693a067b9219596fdc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:19:08 +0100 Subject: genirq/msi: Remove the original sysfs interfaces No more users. Refactor the core code accordingly and move the global interface under CONFIG_PCI_MSI_ARCH_FALLBACKS. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211210221814.168362229@linutronix.de --- include/linux/msi.h | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 1b96dc483b88..634a12962e72 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -249,22 +249,10 @@ void pci_msi_unmask_irq(struct irq_data *data); #ifdef CONFIG_SYSFS int msi_device_populate_sysfs(struct device *dev); void msi_device_destroy_sysfs(struct device *dev); - -const struct attribute_group **msi_populate_sysfs(struct device *dev); -void msi_destroy_sysfs(struct device *dev, - const struct attribute_group **msi_irq_groups); -#else +#else /* CONFIG_SYSFS */ static inline int msi_device_populate_sysfs(struct device *dev) { return 0; } static inline void msi_device_destroy_sysfs(struct device *dev) { } - -static inline const struct attribute_group **msi_populate_sysfs(struct device *dev) -{ - return NULL; -} -static inline void msi_destroy_sysfs(struct device *dev, const struct attribute_group **msi_irq_groups) -{ -} -#endif +#endif /* !CONFIG_SYSFS */ /* * The arch hooks to setup up msi irqs. Default functions are implemented @@ -279,7 +267,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc); void arch_teardown_msi_irq(unsigned int irq); int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void arch_teardown_msi_irqs(struct pci_dev *dev); -#endif +#endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */ /* * The restore hook is still available even for fully irq domain based -- cgit v1.2.3 From 9835cec6d557b0bff3d48bd91cd0484aba59386c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:19:09 +0100 Subject: platform-msi: Rename functions and clarify comments It's hard to distinguish what platform_msi_domain_alloc() and platform_msi_domain_alloc_irqs() are about. Make the distinction more explicit and add comments which explain the use cases properly. Signed-off-by: Thomas Gleixner Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211210221814.228706214@linutronix.de --- include/linux/msi.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 634a12962e72..12dd28629c43 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -435,10 +435,10 @@ __platform_msi_create_device_domain(struct device *dev, #define platform_msi_create_device_tree_domain(dev, nvec, write, ops, data) \ __platform_msi_create_device_domain(dev, nvec, true, write, ops, data) -int platform_msi_domain_alloc(struct irq_domain *domain, unsigned int virq, - unsigned int nr_irqs); -void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq, - unsigned int nvec); +int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs); +void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nvec); void *platform_msi_get_host_data(struct irq_domain *domain); #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ -- cgit v1.2.3 From fc22e7dbcdb3e06a3d3ce05fc91c6a2345411f9b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:19:11 +0100 Subject: platform-msi: Store platform private data pointer in msi_device_data Storing the platform private data in a MSI descriptor is sloppy at best. The data belongs to the device and not to the descriptor. Add a pointer to struct msi_device_data and store the pointer there. Signed-off-by: Thomas Gleixner Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211210221814.287680528@linutronix.de --- include/linux/msi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 12dd28629c43..cdf0d09c3ad4 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -108,11 +108,9 @@ struct pci_msi_desc { /** * platform_msi_desc - Platform device specific msi descriptor data - * @msi_priv_data: Pointer to platform private data * @msi_index: The index of the MSI descriptor for multi MSI */ struct platform_msi_desc { - struct platform_msi_priv_data *msi_priv_data; u16 msi_index; }; @@ -177,10 +175,12 @@ struct msi_desc { * msi_device_data - MSI per device data * @properties: MSI properties which are interesting to drivers * @attrs: Pointer to the sysfs attribute group + * @platform_data: Platform-MSI specific data */ struct msi_device_data { unsigned long properties; const struct attribute_group **attrs; + struct platform_msi_priv_data *platform_data; }; int msi_setup_device_data(struct device *dev); -- cgit v1.2.3 From 20c6d424cfe641659c9a025db8a8608701b27246 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:19:12 +0100 Subject: genirq/msi: Consolidate MSI descriptor data All non PCI/MSI usage variants have data structures in struct msi_desc with only one member: xxx_index. PCI/MSI has a entry_nr member. Add a common msi_index member to struct msi_desc so all implementations can share it which allows further consolidation. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211210221814.350967317@linutronix.de --- include/linux/msi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index cdf0d09c3ad4..ee8fe49dedd1 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -143,6 +143,7 @@ struct ti_sci_inta_msi_desc { * address or data changes * @write_msi_msg_data: Data parameter for the callback. * + * @msi_index: Index of the msi descriptor * @pci: [PCI] PCI speficic msi descriptor data * @platform: [platform] Platform device specific msi descriptor data * @fsl_mc: [fsl-mc] FSL MC device specific msi descriptor data @@ -163,6 +164,7 @@ struct msi_desc { void (*write_msi_msg)(struct msi_desc *entry, void *data); void *write_msi_msg_data; + u16 msi_index; union { struct pci_msi_desc pci; struct platform_msi_desc platform; -- cgit v1.2.3 From dba27c7fa36f468e7eb29b216879f8c33bf0955d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:19:14 +0100 Subject: platform-msi: Use msi_desc::msi_index Use the common msi_index member and get rid of the pointless wrapper struct. Signed-off-by: Thomas Gleixner Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211210221814.413638645@linutronix.de --- include/linux/msi.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index ee8fe49dedd1..1d85e954e130 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -106,14 +106,6 @@ struct pci_msi_desc { }; }; -/** - * platform_msi_desc - Platform device specific msi descriptor data - * @msi_index: The index of the MSI descriptor for multi MSI - */ -struct platform_msi_desc { - u16 msi_index; -}; - /** * fsl_mc_msi_desc - FSL-MC device specific msi descriptor data * @msi_index: The index of the MSI descriptor @@ -145,7 +137,6 @@ struct ti_sci_inta_msi_desc { * * @msi_index: Index of the msi descriptor * @pci: [PCI] PCI speficic msi descriptor data - * @platform: [platform] Platform device specific msi descriptor data * @fsl_mc: [fsl-mc] FSL MC device specific msi descriptor data * @inta: [INTA] TISCI based INTA specific msi descriptor data */ @@ -167,7 +158,6 @@ struct msi_desc { u16 msi_index; union { struct pci_msi_desc pci; - struct platform_msi_desc platform; struct fsl_mc_msi_desc fsl_mc; struct ti_sci_inta_msi_desc inta; }; -- cgit v1.2.3 From 78ee9fb4b8b126ed84a819a6e1732fd3039b525a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:19:15 +0100 Subject: bus: fsl-mc-msi: Use msi_desc::msi_index Use the common msi_index member and get rid of the pointless wrapper struct. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211210221814.477386185@linutronix.de --- include/linux/msi.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 1d85e954e130..25edf83ede41 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -106,14 +106,6 @@ struct pci_msi_desc { }; }; -/** - * fsl_mc_msi_desc - FSL-MC device specific msi descriptor data - * @msi_index: The index of the MSI descriptor - */ -struct fsl_mc_msi_desc { - u16 msi_index; -}; - /** * ti_sci_inta_msi_desc - TISCI based INTA specific msi descriptor data * @dev_index: TISCI device index @@ -137,7 +129,6 @@ struct ti_sci_inta_msi_desc { * * @msi_index: Index of the msi descriptor * @pci: [PCI] PCI speficic msi descriptor data - * @fsl_mc: [fsl-mc] FSL MC device specific msi descriptor data * @inta: [INTA] TISCI based INTA specific msi descriptor data */ struct msi_desc { @@ -158,7 +149,6 @@ struct msi_desc { u16 msi_index; union { struct pci_msi_desc pci; - struct fsl_mc_msi_desc fsl_mc; struct ti_sci_inta_msi_desc inta; }; }; -- cgit v1.2.3 From 0f18095871fc59c89a281caf6f18538cf9e50fbf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:19:17 +0100 Subject: soc: ti: ti_sci_inta_msi: Use msi_desc::msi_index Use the common msi_index member and get rid of the pointless wrapper struct. Signed-off-by: Thomas Gleixner Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Acked-by: Nishanth Menon Link: https://lore.kernel.org/r/20211210221814.540704224@linutronix.de --- include/linux/msi.h | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 25edf83ede41..45ec5d07a5f3 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -106,14 +106,6 @@ struct pci_msi_desc { }; }; -/** - * ti_sci_inta_msi_desc - TISCI based INTA specific msi descriptor data - * @dev_index: TISCI device index - */ -struct ti_sci_inta_msi_desc { - u16 dev_index; -}; - /** * struct msi_desc - Descriptor structure for MSI based interrupts * @list: List head for management @@ -128,8 +120,7 @@ struct ti_sci_inta_msi_desc { * @write_msi_msg_data: Data parameter for the callback. * * @msi_index: Index of the msi descriptor - * @pci: [PCI] PCI speficic msi descriptor data - * @inta: [INTA] TISCI based INTA specific msi descriptor data + * @pci: PCI specific msi descriptor data */ struct msi_desc { /* Shared device/bus type independent data */ @@ -147,10 +138,7 @@ struct msi_desc { void *write_msi_msg_data; u16 msi_index; - union { - struct pci_msi_desc pci; - struct ti_sci_inta_msi_desc inta; - }; + struct pci_msi_desc pci; }; /** -- cgit v1.2.3 From 173ffad79d177d9a91fbf3be6bf67ca81e0f765a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:19:18 +0100 Subject: PCI/MSI: Use msi_desc::msi_index The usage of msi_desc::pci::entry_nr is confusing at best. It's the index into the MSI[X] descriptor table. Use msi_desc::msi_index which is shared between all MSI incarnations instead of having a PCI specific storage for no value. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20211210221814.602911509@linutronix.de --- include/linux/msi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 45ec5d07a5f3..b3d3b0bf59fe 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -80,7 +80,6 @@ typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc, * @multi_cap: [PCI MSI/X] log2 num of messages supported * @can_mask: [PCI MSI/X] Masking supported? * @is_64: [PCI MSI/X] Address size: 0=32bit 1=64bit - * @entry_nr: [PCI MSI/X] Entry which is described by this descriptor * @default_irq:[PCI MSI/X] The default pre-assigned non-MSI irq * @mask_pos: [PCI MSI] Mask register position * @mask_base: [PCI MSI-X] Mask register base address @@ -97,7 +96,6 @@ struct pci_msi_desc { u8 can_mask : 1; u8 is_64 : 1; u8 is_virtual : 1; - u16 entry_nr; unsigned default_irq; } msi_attrib; union { -- cgit v1.2.3 From 7a823443e9b4ed1ff4a3026d184f09d23fd6d9c9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:19:20 +0100 Subject: PCI/MSI: Provide MSI_FLAG_MSIX_CONTIGUOUS Provide a domain info flag which makes the core code check for a contiguous MSI-X index on allocation. That's simpler than checking it at some other domain callback in architecture code. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20211210221814.662401116@linutronix.de --- include/linux/msi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index b3d3b0bf59fe..d206239e6fa8 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -362,6 +362,8 @@ enum { MSI_FLAG_LEVEL_CAPABLE = (1 << 6), /* Populate sysfs on alloc() and destroy it on free() */ MSI_FLAG_DEV_SYSFS = (1 << 7), + /* MSI-X entries must be contiguous */ + MSI_FLAG_MSIX_CONTIGUOUS = (1 << 8), }; int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, -- cgit v1.2.3 From cf15f43acaad31dabb2646cef170a506a1d663eb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:19:23 +0100 Subject: genirq/msi: Provide interface to retrieve Linux interrupt number This allows drivers to retrieve the Linux interrupt number instead of fiddling with MSI descriptors. msi_get_virq() returns the Linux interrupt number or 0 in case that there is no entry for the given MSI index. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211210221814.780824745@linutronix.de --- include/linux/msi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index d206239e6fa8..7593fc383dba 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -153,6 +153,8 @@ struct msi_device_data { int msi_setup_device_data(struct device *dev); +unsigned int msi_get_virq(struct device *dev, unsigned int index); + /* Helpers to hide struct msi_desc implementation details */ #define msi_desc_to_dev(desc) ((desc)->dev) #define dev_to_msi_list(dev) (&(dev)->msi_list) -- cgit v1.2.3 From d86a6d47bcc6b41fe2a4e13313d66a772d00382f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:19:34 +0100 Subject: bus: fsl-mc: fsl-mc-allocator: Rework MSI handling Storing a pointer to the MSI descriptor just to track the Linux interrupt number is daft. Just store the interrupt number and be done with it. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211210221815.207838579@linutronix.de --- include/linux/fsl/mc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index e026f6c48b49..7b6c42bfb660 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -91,13 +91,13 @@ struct fsl_mc_resource { /** * struct fsl_mc_device_irq - MC object device message-based interrupt - * @msi_desc: pointer to MSI descriptor allocated by fsl_mc_msi_alloc_descs() + * @virq: Linux virtual interrupt number * @mc_dev: MC object device that owns this interrupt * @dev_irq_index: device-relative IRQ index * @resource: MC generic resource associated with the interrupt */ struct fsl_mc_device_irq { - struct msi_desc *msi_desc; + unsigned int virq; struct fsl_mc_device *mc_dev; u8 dev_irq_index; struct fsl_mc_resource resource; -- cgit v1.2.3 From 89e0032ec201f76c86d6e3e6f94574dfb8e39b71 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Dec 2021 23:19:35 +0100 Subject: soc: ti: ti_sci_inta_msi: Get rid of ti_sci_inta_msi_get_virq() Just use the core function msi_get_virq(). Signed-off-by: Thomas Gleixner Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Acked-by: Arnd Bergmann Acked-by: Vinod Koul Acked-by: Nishanth Menon Link: https://lore.kernel.org/r/20211210221815.269468319@linutronix.de --- include/linux/soc/ti/ti_sci_inta_msi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/soc/ti/ti_sci_inta_msi.h b/include/linux/soc/ti/ti_sci_inta_msi.h index e3aa8b14612e..25ea78a8ea5c 100644 --- a/include/linux/soc/ti/ti_sci_inta_msi.h +++ b/include/linux/soc/ti/ti_sci_inta_msi.h @@ -18,6 +18,5 @@ struct irq_domain struct irq_domain *parent); int ti_sci_inta_msi_domain_alloc_irqs(struct device *dev, struct ti_sci_resource *res); -unsigned int ti_sci_inta_msi_get_virq(struct device *dev, u32 index); void ti_sci_inta_msi_domain_free_irqs(struct device *dev); #endif /* __INCLUDE_LINUX_IRQCHIP_TI_SCI_INTA_H */ -- cgit v1.2.3 From 125282cd4f33ecd53a24ae4807409da0e5e90fd4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:51:04 +0100 Subject: genirq/msi: Move descriptor list to struct msi_device_data It's only required when MSI is in use. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211206210747.650487479@linutronix.de --- include/linux/device.h | 4 ---- include/linux/msi.h | 4 +++- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index f0033cd93631..93459724dcde 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -423,7 +423,6 @@ struct dev_msi_info { * @pins: For device pin management. * See Documentation/driver-api/pin-control.rst for details. * @msi: MSI related data - * @msi_list: Hosts MSI descriptors * @numa_node: NUMA node this device is close to. * @dma_ops: DMA mapping operations for this device. * @dma_mask: Dma mask (if dma'ble device). @@ -519,9 +518,6 @@ struct device { struct dev_pin_info *pins; #endif struct dev_msi_info msi; -#ifdef CONFIG_GENERIC_MSI_IRQ - struct list_head msi_list; -#endif #ifdef CONFIG_DMA_OPS const struct dma_map_ops *dma_ops; #endif diff --git a/include/linux/msi.h b/include/linux/msi.h index 7593fc383dba..4223e47103ed 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -144,11 +144,13 @@ struct msi_desc { * @properties: MSI properties which are interesting to drivers * @attrs: Pointer to the sysfs attribute group * @platform_data: Platform-MSI specific data + * @list: List of MSI descriptors associated to the device */ struct msi_device_data { unsigned long properties; const struct attribute_group **attrs; struct platform_msi_priv_data *platform_data; + struct list_head list; }; int msi_setup_device_data(struct device *dev); @@ -157,7 +159,7 @@ unsigned int msi_get_virq(struct device *dev, unsigned int index); /* Helpers to hide struct msi_desc implementation details */ #define msi_desc_to_dev(desc) ((desc)->dev) -#define dev_to_msi_list(dev) (&(dev)->msi_list) +#define dev_to_msi_list(dev) (&(dev)->msi.data->list) #define first_msi_entry(dev) \ list_first_entry(dev_to_msi_list((dev)), struct msi_desc, list) #define for_each_msi_entry(desc, dev) \ -- cgit v1.2.3 From b5f687f97d1e112493fe0447a1fb09fbd93c334b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:51:05 +0100 Subject: genirq/msi: Add mutex for MSI list protection For upcoming runtime extensions of MSI-X interrupts it's required to protect the MSI descriptor list. Add a mutex to struct msi_device_data and provide lock/unlock functions. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211206210747.708877269@linutronix.de --- include/linux/msi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 4223e47103ed..2cf6c530588d 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -3,6 +3,7 @@ #define LINUX_MSI_H #include +#include #include #include @@ -145,17 +146,21 @@ struct msi_desc { * @attrs: Pointer to the sysfs attribute group * @platform_data: Platform-MSI specific data * @list: List of MSI descriptors associated to the device + * @mutex: Mutex protecting the MSI list */ struct msi_device_data { unsigned long properties; const struct attribute_group **attrs; struct platform_msi_priv_data *platform_data; struct list_head list; + struct mutex mutex; }; int msi_setup_device_data(struct device *dev); unsigned int msi_get_virq(struct device *dev, unsigned int index); +void msi_lock_descs(struct device *dev); +void msi_unlock_descs(struct device *dev); /* Helpers to hide struct msi_desc implementation details */ #define msi_desc_to_dev(desc) ((desc)->dev) -- cgit v1.2.3 From 0f62d941acf9ac3b6025692ce649b1f282b89e7f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:51:07 +0100 Subject: genirq/msi: Provide msi_domain_alloc/free_irqs_descs_locked() Usage sites which do allocations of the MSI descriptors before invoking msi_domain_alloc_irqs() require to lock the MSI decriptors accross the operation. Provide entry points which can be called with the MSI mutex held and lock the mutex in the existing entry points. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211206210747.765371053@linutronix.de --- include/linux/msi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 2cf6c530588d..69c588efe85b 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -383,9 +383,12 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, struct irq_domain *parent); int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec); +int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device *dev, + int nvec); int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec); void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); +void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device *dev); void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain); -- cgit v1.2.3 From 1046f71d7268b1680d7b044dea83c664403f6302 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:51:08 +0100 Subject: genirq/msi: Provide a set of advanced MSI accessors and iterators In preparation for dynamic handling of MSI-X interrupts provide a new set of MSI descriptor accessor functions and iterators. They are benefitial per se as they allow to cleanup quite some code in various MSI domain implementations. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211206210747.818635078@linutronix.de --- include/linux/msi.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 69c588efe85b..703221f7e9ea 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -140,6 +140,18 @@ struct msi_desc { struct pci_msi_desc pci; }; +/* + * Filter values for the MSI descriptor iterators and accessor functions. + */ +enum msi_desc_filter { + /* All descriptors */ + MSI_DESC_ALL, + /* Descriptors which have no interrupt associated */ + MSI_DESC_NOTASSOCIATED, + /* Descriptors which have an interrupt associated */ + MSI_DESC_ASSOCIATED, +}; + /** * msi_device_data - MSI per device data * @properties: MSI properties which are interesting to drivers @@ -147,6 +159,7 @@ struct msi_desc { * @platform_data: Platform-MSI specific data * @list: List of MSI descriptors associated to the device * @mutex: Mutex protecting the MSI list + * @__next: Cached pointer to the next entry for iterators */ struct msi_device_data { unsigned long properties; @@ -154,6 +167,7 @@ struct msi_device_data { struct platform_msi_priv_data *platform_data; struct list_head list; struct mutex mutex; + struct msi_desc *__next; }; int msi_setup_device_data(struct device *dev); @@ -162,6 +176,25 @@ unsigned int msi_get_virq(struct device *dev, unsigned int index); void msi_lock_descs(struct device *dev); void msi_unlock_descs(struct device *dev); +struct msi_desc *msi_first_desc(struct device *dev, enum msi_desc_filter filter); +struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter); + +/** + * msi_for_each_desc - Iterate the MSI descriptors + * + * @desc: struct msi_desc pointer used as iterator + * @dev: struct device pointer - device to iterate + * @filter: Filter for descriptor selection + * + * Notes: + * - The loop must be protected with a msi_lock_descs()/msi_unlock_descs() + * pair. + * - It is safe to remove a retrieved MSI descriptor in the loop. + */ +#define msi_for_each_desc(desc, dev, filter) \ + for ((desc) = msi_first_desc((dev), (filter)); (desc); \ + (desc) = msi_next_desc((dev), (filter))) + /* Helpers to hide struct msi_desc implementation details */ #define msi_desc_to_dev(desc) ((desc)->dev) #define dev_to_msi_list(dev) (&(dev)->msi.data->list) -- cgit v1.2.3 From 602905253607ba892336f7bba8bb45b5be819d87 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:51:10 +0100 Subject: genirq/msi: Provide msi_alloc_msi_desc() and a simple allocator Provide msi_alloc_msi_desc() which takes a template MSI descriptor for initializing a newly allocated descriptor. This allows to simplify various usage sites of alloc_msi_entry() and moves the storage handling into the core code. For simple cases where only a linear vector space is required provide msi_add_simple_msi_descs() which just allocates a linear range of MSI descriptors and fills msi_desc::msi_index accordingly. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211206210747.873833567@linutronix.de --- include/linux/msi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 703221f7e9ea..bbb8c1e2c18b 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -247,6 +247,8 @@ static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) } #endif /* CONFIG_PCI_MSI */ +int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc); + struct msi_desc *alloc_msi_entry(struct device *dev, int nvec, const struct irq_affinity_desc *affinity); void free_msi_entry(struct msi_desc *entry); -- cgit v1.2.3 From 645474e2cee450131e8b8d8a69a5d9bbabd43f3f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:51:12 +0100 Subject: genirq/msi: Provide domain flags to allocate/free MSI descriptors automatically Provide domain info flags which tell the core to allocate simple descriptors or to free descriptors when the interrupts are freed and implement the required functionality. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211206210747.928198636@linutronix.de --- include/linux/msi.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index bbb8c1e2c18b..17e47ab8d57a 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -105,6 +105,8 @@ struct pci_msi_desc { }; }; +#define MSI_MAX_INDEX ((unsigned int)USHRT_MAX) + /** * struct msi_desc - Descriptor structure for MSI based interrupts * @list: List head for management @@ -248,6 +250,17 @@ static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) #endif /* CONFIG_PCI_MSI */ int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc); +void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter, + unsigned int first_index, unsigned int last_index); + +/** + * msi_free_msi_descs - Free MSI descriptors of a device + * @dev: Device to free the descriptors + */ +static inline void msi_free_msi_descs(struct device *dev) +{ + msi_free_msi_descs_range(dev, MSI_DESC_ALL, 0, MSI_MAX_INDEX); +} struct msi_desc *alloc_msi_entry(struct device *dev, int nvec, const struct irq_affinity_desc *affinity); @@ -408,6 +421,10 @@ enum { MSI_FLAG_DEV_SYSFS = (1 << 7), /* MSI-X entries must be contiguous */ MSI_FLAG_MSIX_CONTIGUOUS = (1 << 8), + /* Allocate simple MSI descriptors */ + MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 9), + /* Free MSI descriptors */ + MSI_FLAG_FREE_MSI_DESCS = (1 << 10), }; int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, -- cgit v1.2.3 From 7ad321a5eadb52b4af1c577dda51783e08235ea7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:51:37 +0100 Subject: soc: ti: ti_sci_inta_msi: Remove ti_sci_inta_msi_domain_free_irqs() The function has no users and is pointless now that the core frees the MSI descriptors, which means potential users can just use msi_domain_free_irqs(). Signed-off-by: Thomas Gleixner Tested-by: Nishanth Menon Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211206210748.793119155@linutronix.de --- include/linux/soc/ti/ti_sci_inta_msi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/soc/ti/ti_sci_inta_msi.h b/include/linux/soc/ti/ti_sci_inta_msi.h index 25ea78a8ea5c..4dba2f2aff6f 100644 --- a/include/linux/soc/ti/ti_sci_inta_msi.h +++ b/include/linux/soc/ti/ti_sci_inta_msi.h @@ -18,5 +18,4 @@ struct irq_domain struct irq_domain *parent); int ti_sci_inta_msi_domain_alloc_irqs(struct device *dev, struct ti_sci_resource *res); -void ti_sci_inta_msi_domain_free_irqs(struct device *dev); #endif /* __INCLUDE_LINUX_IRQCHIP_TI_SCI_INTA_H */ -- cgit v1.2.3 From ef8dd01538ea2553ab101ddce6a85a321406d9c0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:51:44 +0100 Subject: genirq/msi: Make interrupt allocation less convoluted There is no real reason to do several loops over the MSI descriptors instead of just doing one loop. In case of an error everything is undone anyway so it does not matter whether it's a partial or a full rollback. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211206210749.010234767@linutronix.de --- include/linux/msi.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 17e47ab8d57a..e8dd0be17e89 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -206,12 +206,6 @@ struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter); list_for_each_entry((desc), dev_to_msi_list((dev)), list) #define for_each_msi_entry_safe(desc, tmp, dev) \ list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list) -#define for_each_msi_vector(desc, __irq, dev) \ - for_each_msi_entry((desc), (dev)) \ - if ((desc)->irq) \ - for (__irq = (desc)->irq; \ - __irq < ((desc)->irq + (desc)->nvec_used); \ - __irq++) #ifdef CONFIG_IRQ_MSI_IOMMU static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) -- cgit v1.2.3 From cc9a246dbf6bdef56d9eee296a1db52dd0607976 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:51:47 +0100 Subject: genirq/msi: Mop up old interfaces Get rid of the old iterators, alloc/free functions and adjust the core code accordingly. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211206210749.117395027@linutronix.de --- include/linux/msi.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index e8dd0be17e89..b54010ba7b0d 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -197,15 +197,7 @@ struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter); for ((desc) = msi_first_desc((dev), (filter)); (desc); \ (desc) = msi_next_desc((dev), (filter))) -/* Helpers to hide struct msi_desc implementation details */ #define msi_desc_to_dev(desc) ((desc)->dev) -#define dev_to_msi_list(dev) (&(dev)->msi.data->list) -#define first_msi_entry(dev) \ - list_first_entry(dev_to_msi_list((dev)), struct msi_desc, list) -#define for_each_msi_entry(desc, dev) \ - list_for_each_entry((desc), dev_to_msi_list((dev)), list) -#define for_each_msi_entry_safe(desc, tmp, dev) \ - list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list) #ifdef CONFIG_IRQ_MSI_IOMMU static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) @@ -231,10 +223,6 @@ static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, #endif #ifdef CONFIG_PCI_MSI -#define first_pci_msi_entry(pdev) first_msi_entry(&(pdev)->dev) -#define for_each_pci_msi_entry(desc, pdev) \ - for_each_msi_entry((desc), &(pdev)->dev) - struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc); void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg); #else /* CONFIG_PCI_MSI */ @@ -256,9 +244,6 @@ static inline void msi_free_msi_descs(struct device *dev) msi_free_msi_descs_range(dev, MSI_DESC_ALL, 0, MSI_MAX_INDEX); } -struct msi_desc *alloc_msi_entry(struct device *dev, int nvec, - const struct irq_affinity_desc *affinity); -void free_msi_entry(struct msi_desc *entry); void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); -- cgit v1.2.3 From ef3350c53d2aac65cf1c4ecc968bbb1de5f421ea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:51:49 +0100 Subject: genirq/msi: Add abuse prevention comment to msi header Hope dies last. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211206210749.170847844@linutronix.de --- include/linux/msi.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index b54010ba7b0d..70cc6a555a8e 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -2,6 +2,20 @@ #ifndef LINUX_MSI_H #define LINUX_MSI_H +/* + * This header file contains MSI data structures and functions which are + * only relevant for: + * - Interrupt core code + * - PCI/MSI core code + * - MSI interrupt domain implementations + * - IOMMU, low level VFIO, NTB and other justified exceptions + * dealing with low level MSI details. + * + * Regular device drivers have no business with any of these functions and + * especially storing MSI descriptor pointers in random code is considered + * abuse. The only function which is relevant for drivers is msi_get_virq(). + */ + #include #include #include -- cgit v1.2.3 From bf5e758f02fc739589dcc6a3395c3a3eb77b5c90 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:51:50 +0100 Subject: genirq/msi: Simplify sysfs handling The sysfs handling for MSI is a convoluted maze and it is in the way of supporting dynamic expansion of the MSI-X vectors because it only supports a one off bulk population/free of the sysfs entries. Change it to do: 1) Creating an empty sysfs attribute group when msi_device_data is allocated 2) Populate the entries when the MSI descriptor is initialized 3) Free the entries when a MSI descriptor is detached from a Linux interrupt. 4) Provide functions for the legacy non-irqdomain fallback code to do a bulk population/free. This code won't support dynamic expansion. This makes the code simpler and reduces the number of allocations as the empty attribute group can be shared. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211206210749.224917330@linutronix.de --- include/linux/msi.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 70cc6a555a8e..1a00367d2cfa 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -71,7 +71,7 @@ struct irq_data; struct msi_desc; struct pci_dev; struct platform_msi_priv_data; -struct attribute_group; +struct device_attribute; void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); #ifdef CONFIG_GENERIC_MSI_IRQ @@ -129,6 +129,7 @@ struct pci_msi_desc { * @dev: Pointer to the device which uses this descriptor * @msg: The last set MSI message cached for reuse * @affinity: Optional pointer to a cpu affinity mask for this descriptor + * @sysfs_attr: Pointer to sysfs device attribute * * @write_msi_msg: Callback that may be called when the MSI message * address or data changes @@ -148,6 +149,9 @@ struct msi_desc { #ifdef CONFIG_IRQ_MSI_IOMMU const void *iommu_cookie; #endif +#ifdef CONFIG_SYSFS + struct device_attribute *sysfs_attrs; +#endif void (*write_msi_msg)(struct msi_desc *entry, void *data); void *write_msi_msg_data; @@ -171,7 +175,6 @@ enum msi_desc_filter { /** * msi_device_data - MSI per device data * @properties: MSI properties which are interesting to drivers - * @attrs: Pointer to the sysfs attribute group * @platform_data: Platform-MSI specific data * @list: List of MSI descriptors associated to the device * @mutex: Mutex protecting the MSI list @@ -179,7 +182,6 @@ enum msi_desc_filter { */ struct msi_device_data { unsigned long properties; - const struct attribute_group **attrs; struct platform_msi_priv_data *platform_data; struct list_head list; struct mutex mutex; @@ -264,14 +266,6 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void pci_msi_mask_irq(struct irq_data *data); void pci_msi_unmask_irq(struct irq_data *data); -#ifdef CONFIG_SYSFS -int msi_device_populate_sysfs(struct device *dev); -void msi_device_destroy_sysfs(struct device *dev); -#else /* CONFIG_SYSFS */ -static inline int msi_device_populate_sysfs(struct device *dev) { return 0; } -static inline void msi_device_destroy_sysfs(struct device *dev) { } -#endif /* !CONFIG_SYSFS */ - /* * The arch hooks to setup up msi irqs. Default functions are implemented * as weak symbols so that they /can/ be overriden by architecture specific @@ -285,6 +279,13 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc); void arch_teardown_msi_irq(unsigned int irq); int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void arch_teardown_msi_irqs(struct pci_dev *dev); +#ifdef CONFIG_SYSFS +int msi_device_populate_sysfs(struct device *dev); +void msi_device_destroy_sysfs(struct device *dev); +#else /* CONFIG_SYSFS */ +static inline int msi_device_populate_sysfs(struct device *dev) { return 0; } +static inline void msi_device_destroy_sysfs(struct device *dev) { } +#endif /* !CONFIG_SYSFS */ #endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */ /* -- cgit v1.2.3 From cd6cf06590b9792340dceaa285138777f3cc4d90 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Dec 2021 23:51:52 +0100 Subject: genirq/msi: Convert storage to xarray The current linked list storage for MSI descriptors is suboptimal in several ways: 1) Looking up a MSI desciptor requires a O(n) list walk in the worst case 2) The upcoming support of runtime expansion of MSI-X vectors would need to do a full list walk to figure out whether a particular index is already associated. 3) Runtime expansion of sparse allocations is even more complex as the current implementation assumes an ordered list (increasing MSI index). Use an xarray which solves all of the above problems nicely. Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Nishanth Menon Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20211206210749.280627070@linutronix.de --- include/linux/msi.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 1a00367d2cfa..fc918a658d48 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -17,6 +17,7 @@ */ #include +#include #include #include #include @@ -123,7 +124,6 @@ struct pci_msi_desc { /** * struct msi_desc - Descriptor structure for MSI based interrupts - * @list: List head for management * @irq: The base interrupt number * @nvec_used: The number of vectors used * @dev: Pointer to the device which uses this descriptor @@ -140,7 +140,6 @@ struct pci_msi_desc { */ struct msi_desc { /* Shared device/bus type independent data */ - struct list_head list; unsigned int irq; unsigned int nvec_used; struct device *dev; @@ -176,16 +175,16 @@ enum msi_desc_filter { * msi_device_data - MSI per device data * @properties: MSI properties which are interesting to drivers * @platform_data: Platform-MSI specific data - * @list: List of MSI descriptors associated to the device - * @mutex: Mutex protecting the MSI list - * @__next: Cached pointer to the next entry for iterators + * @mutex: Mutex protecting the MSI descriptor store + * @__store: Xarray for storing MSI descriptor pointers + * @__iter_idx: Index to search the next entry for iterators */ struct msi_device_data { unsigned long properties; struct platform_msi_priv_data *platform_data; - struct list_head list; struct mutex mutex; - struct msi_desc *__next; + struct xarray __store; + unsigned long __iter_idx; }; int msi_setup_device_data(struct device *dev); -- cgit v1.2.3 From 60f20d84dc813f1342771a3e4f06d89da26dc412 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 18 Nov 2021 12:12:10 -0600 Subject: of/fdt: Rework early_init_dt_scan_chosen() to call directly Use of the of_scan_flat_dt() function predates libfdt and is discouraged as libfdt provides a nicer set of APIs. Rework early_init_dt_scan_chosen() to be called directly and use libfdt. Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Frank Rowand Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Rob Herring Reviewed-by: Frank Rowand Link: https://lore.kernel.org/r/20211118181213.1433346-2-robh@kernel.org --- include/linux/of_fdt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index cf48983d3c86..654722235df6 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -58,8 +58,7 @@ extern int of_flat_dt_is_compatible(unsigned long node, const char *name); extern unsigned long of_get_flat_dt_root(void); extern uint32_t of_get_flat_dt_phandle(unsigned long node); -extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, - int depth, void *data); +extern int early_init_dt_scan_chosen(char *cmdline); extern int early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data); extern int early_init_dt_scan_chosen_stdout(void); -- cgit v1.2.3 From d665881d2171b62ca1ea23be89be6f2a8a330bb2 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 18 Nov 2021 12:12:11 -0600 Subject: of/fdt: Rework early_init_dt_scan_root() to call directly Use of the of_scan_flat_dt() function predates libfdt and is discouraged as libfdt provides a nicer set of APIs. Rework early_init_dt_scan_root() to be called directly and use libfdt. Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Frank Rowand Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Rob Herring Reviewed-by: Frank Rowand Link: https://lore.kernel.org/r/20211118181213.1433346-3-robh@kernel.org --- include/linux/of_fdt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 654722235df6..df3d31926c3c 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -68,8 +68,7 @@ extern void early_init_dt_add_memory_arch(u64 base, u64 size); extern u64 dt_mem_next_cell(int s, const __be32 **cellp); /* Early flat tree scan hooks */ -extern int early_init_dt_scan_root(unsigned long node, const char *uname, - int depth, void *data); +extern int early_init_dt_scan_root(void); extern bool early_init_dt_scan(void *params); extern bool early_init_dt_verify(void *params); -- cgit v1.2.3 From 1f012283e9360fb4007308f04cfaeb205e34b684 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 15 Dec 2021 09:01:02 -0600 Subject: of/fdt: Rework early_init_dt_scan_memory() to call directly Use of the of_scan_flat_dt() function predates libfdt and is discouraged as libfdt provides a nicer set of APIs. Rework early_init_dt_scan_memory() to be called directly and use libfdt. Cc: John Crispin Cc: Thomas Bogendoerfer Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Frank Rowand Cc: linux-mips@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Reviewed-by: Frank Rowand Signed-off-by: Rob Herring Tested-by: Michael Ellerman Link: https://lore.kernel.org/r/20211215150102.1303588-1-robh@kernel.org --- include/linux/of_fdt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index df3d31926c3c..914739f3c192 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -59,8 +59,7 @@ extern unsigned long of_get_flat_dt_root(void); extern uint32_t of_get_flat_dt_phandle(unsigned long node); extern int early_init_dt_scan_chosen(char *cmdline); -extern int early_init_dt_scan_memory(unsigned long node, const char *uname, - int depth, void *data); +extern int early_init_dt_scan_memory(void); extern int early_init_dt_scan_chosen_stdout(void); extern void early_init_fdt_scan_reserved_mem(void); extern void early_init_fdt_reserve_self(void); -- cgit v1.2.3 From f7ea534a0920dbaf71a8003936e178e14ec9271d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 15 Dec 2021 18:55:36 -0800 Subject: add includes masked by cgroup -> bpf dependency cgroup pulls in BPF which pulls in a lot of includes. We're about to break that chain so fix those who were depending on it. Signed-off-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211216025538.1649516-2-kuba@kernel.org --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0dcfd265beed..4a021149eaf0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -611,6 +611,7 @@ struct swevent_hlist { #define PERF_ATTACH_SCHED_CB 0x20 #define PERF_ATTACH_CHILD 0x40 +struct bpf_prog; struct perf_cgroup; struct perf_buffer; -- cgit v1.2.3 From fd1740b6abac39f68ce12e201697f106e0f1d519 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 15 Dec 2021 18:55:38 -0800 Subject: bpf: Remove the cgroup -> bpf header dependecy Remove the dependency from cgroup-defs.h to bpf-cgroup.h and bpf.h. This reduces the incremental build size of x86 allmodconfig after bpf.h was touched from ~17k objects rebuilt to ~5k objects. bpf.h is 2.2kLoC and is modified relatively often. We need a new header with just the definition of struct cgroup_bpf and enum cgroup_bpf_attach_type, this is akin to cgroup-defs.h. Signed-off-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov Acked-by: Tejun Heo Link: https://lore.kernel.org/bpf/20211216025538.1649516-4-kuba@kernel.org --- include/linux/bpf-cgroup-defs.h | 70 +++++++++++++++++++++++++++++++++++++++++ include/linux/bpf-cgroup.h | 57 +-------------------------------- include/linux/cgroup-defs.h | 2 +- 3 files changed, 72 insertions(+), 57 deletions(-) create mode 100644 include/linux/bpf-cgroup-defs.h (limited to 'include') diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h new file mode 100644 index 000000000000..695d1224a71b --- /dev/null +++ b/include/linux/bpf-cgroup-defs.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BPF_CGROUP_DEFS_H +#define _BPF_CGROUP_DEFS_H + +#ifdef CONFIG_CGROUP_BPF + +#include +#include +#include + +struct bpf_prog_array; + +enum cgroup_bpf_attach_type { + CGROUP_BPF_ATTACH_TYPE_INVALID = -1, + CGROUP_INET_INGRESS = 0, + CGROUP_INET_EGRESS, + CGROUP_INET_SOCK_CREATE, + CGROUP_SOCK_OPS, + CGROUP_DEVICE, + CGROUP_INET4_BIND, + CGROUP_INET6_BIND, + CGROUP_INET4_CONNECT, + CGROUP_INET6_CONNECT, + CGROUP_INET4_POST_BIND, + CGROUP_INET6_POST_BIND, + CGROUP_UDP4_SENDMSG, + CGROUP_UDP6_SENDMSG, + CGROUP_SYSCTL, + CGROUP_UDP4_RECVMSG, + CGROUP_UDP6_RECVMSG, + CGROUP_GETSOCKOPT, + CGROUP_SETSOCKOPT, + CGROUP_INET4_GETPEERNAME, + CGROUP_INET6_GETPEERNAME, + CGROUP_INET4_GETSOCKNAME, + CGROUP_INET6_GETSOCKNAME, + CGROUP_INET_SOCK_RELEASE, + MAX_CGROUP_BPF_ATTACH_TYPE +}; + +struct cgroup_bpf { + /* array of effective progs in this cgroup */ + struct bpf_prog_array __rcu *effective[MAX_CGROUP_BPF_ATTACH_TYPE]; + + /* attached progs to this cgroup and attach flags + * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will + * have either zero or one element + * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS + */ + struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE]; + u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE]; + + /* list of cgroup shared storages */ + struct list_head storages; + + /* temp storage for effective prog array used by prog_attach/detach */ + struct bpf_prog_array *inactive; + + /* reference counter used to detach bpf programs after cgroup removal */ + struct percpu_ref refcnt; + + /* cgroup_bpf is released using a work queue */ + struct work_struct release_work; +}; + +#else /* CONFIG_CGROUP_BPF */ +struct cgroup_bpf {}; +#endif /* CONFIG_CGROUP_BPF */ + +#endif diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 11820a430d6c..b525d8cdc25b 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -3,10 +3,10 @@ #define _BPF_CGROUP_H #include +#include #include #include #include -#include #include #include @@ -23,33 +23,6 @@ struct ctl_table_header; struct task_struct; #ifdef CONFIG_CGROUP_BPF -enum cgroup_bpf_attach_type { - CGROUP_BPF_ATTACH_TYPE_INVALID = -1, - CGROUP_INET_INGRESS = 0, - CGROUP_INET_EGRESS, - CGROUP_INET_SOCK_CREATE, - CGROUP_SOCK_OPS, - CGROUP_DEVICE, - CGROUP_INET4_BIND, - CGROUP_INET6_BIND, - CGROUP_INET4_CONNECT, - CGROUP_INET6_CONNECT, - CGROUP_INET4_POST_BIND, - CGROUP_INET6_POST_BIND, - CGROUP_UDP4_SENDMSG, - CGROUP_UDP6_SENDMSG, - CGROUP_SYSCTL, - CGROUP_UDP4_RECVMSG, - CGROUP_UDP6_RECVMSG, - CGROUP_GETSOCKOPT, - CGROUP_SETSOCKOPT, - CGROUP_INET4_GETPEERNAME, - CGROUP_INET6_GETPEERNAME, - CGROUP_INET4_GETSOCKNAME, - CGROUP_INET6_GETSOCKNAME, - CGROUP_INET_SOCK_RELEASE, - MAX_CGROUP_BPF_ATTACH_TYPE -}; #define CGROUP_ATYPE(type) \ case BPF_##type: return type @@ -127,33 +100,6 @@ struct bpf_prog_list { struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; }; -struct bpf_prog_array; - -struct cgroup_bpf { - /* array of effective progs in this cgroup */ - struct bpf_prog_array __rcu *effective[MAX_CGROUP_BPF_ATTACH_TYPE]; - - /* attached progs to this cgroup and attach flags - * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will - * have either zero or one element - * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS - */ - struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE]; - u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE]; - - /* list of cgroup shared storages */ - struct list_head storages; - - /* temp storage for effective prog array used by prog_attach/detach */ - struct bpf_prog_array *inactive; - - /* reference counter used to detach bpf programs after cgroup removal */ - struct percpu_ref refcnt; - - /* cgroup_bpf is released using a work queue */ - struct work_struct release_work; -}; - int cgroup_bpf_inherit(struct cgroup *cgrp); void cgroup_bpf_offline(struct cgroup *cgrp); @@ -451,7 +397,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); #else -struct cgroup_bpf {}; static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index db2e147e069f..411684c80cf3 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_CGROUPS -- cgit v1.2.3 From 0f55f9ed21f96630c6ec96805d42f92c0b458b37 Mon Sep 17 00:00:00 2001 From: Christy Lee Date: Thu, 16 Dec 2021 13:33:56 -0800 Subject: bpf: Only print scratched registers and stack slots to verifier logs. When printing verifier state for any log level, print full verifier state only on function calls or on errors. Otherwise, only print the registers and stack slots that were accessed. Log size differences: verif_scale_loop6 before: 234566564 verif_scale_loop6 after: 72143943 69% size reduction kfree_skb before: 166406 kfree_skb after: 55386 69% size reduction Before: 156: (61) r0 = *(u32 *)(r1 +0) 157: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1=ctx(id=0,off=0,imm=0) R2_w=invP0 R10=fp0 fp-8_w=00000000 fp-16_w=00\ 000000 fp-24_w=00000000 fp-32_w=00000000 fp-40_w=00000000 fp-48_w=00000000 fp-56_w=00000000 fp-64_w=00000000 fp-72_w=00000000 fp-80_w=00000\ 000 fp-88_w=00000000 fp-96_w=00000000 fp-104_w=00000000 fp-112_w=00000000 fp-120_w=00000000 fp-128_w=00000000 fp-136_w=00000000 fp-144_w=00\ 000000 fp-152_w=00000000 fp-160_w=00000000 fp-168_w=00000000 fp-176_w=00000000 fp-184_w=00000000 fp-192_w=00000000 fp-200_w=00000000 fp-208\ _w=00000000 fp-216_w=00000000 fp-224_w=00000000 fp-232_w=00000000 fp-240_w=00000000 fp-248_w=00000000 fp-256_w=00000000 fp-264_w=00000000 f\ p-272_w=00000000 fp-280_w=00000000 fp-288_w=00000000 fp-296_w=00000000 fp-304_w=00000000 fp-312_w=00000000 fp-320_w=00000000 fp-328_w=00000\ 000 fp-336_w=00000000 fp-344_w=00000000 fp-352_w=00000000 fp-360_w=00000000 fp-368_w=00000000 fp-376_w=00000000 fp-384_w=00000000 fp-392_w=\ 00000000 fp-400_w=00000000 fp-408_w=00000000 fp-416_w=00000000 fp-424_w=00000000 fp-432_w=00000000 fp-440_w=00000000 fp-448_w=00000000 ; return skb->len; 157: (95) exit Func#4 is safe for any args that match its prototype Validating get_constant() func#5... 158: R1=invP(id=0) R10=fp0 ; int get_constant(long val) 158: (bf) r0 = r1 159: R0_w=invP(id=1) R1=invP(id=1) R10=fp0 ; return val - 122; 159: (04) w0 += -122 160: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1=invP(id=1) R10=fp0 ; return val - 122; 160: (95) exit Func#5 is safe for any args that match its prototype Validating get_skb_ifindex() func#6... 161: R1=invP(id=0) R2=ctx(id=0,off=0,imm=0) R3=invP(id=0) R10=fp0 ; int get_skb_ifindex(int val, struct __sk_buff *skb, int var) 161: (bc) w0 = w3 162: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1=invP(id=0) R2=ctx(id=0,off=0,imm=0) R3=invP(id=0) R10=fp0 After: 156: (61) r0 = *(u32 *)(r1 +0) 157: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1=ctx(id=0,off=0,imm=0) ; return skb->len; 157: (95) exit Func#4 is safe for any args that match its prototype Validating get_constant() func#5... 158: R1=invP(id=0) R10=fp0 ; int get_constant(long val) 158: (bf) r0 = r1 159: R0_w=invP(id=1) R1=invP(id=1) ; return val - 122; 159: (04) w0 += -122 160: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) ; return val - 122; 160: (95) exit Func#5 is safe for any args that match its prototype Validating get_skb_ifindex() func#6... 161: R1=invP(id=0) R2=ctx(id=0,off=0,imm=0) R3=invP(id=0) R10=fp0 ; int get_skb_ifindex(int val, struct __sk_buff *skb, int var) 161: (bc) w0 = w3 162: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R3=invP(id=0) Signed-off-by: Christy Lee Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211216213358.3374427-2-christylee@fb.com --- include/linux/bpf_verifier.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 182b16a91084..c66f238c538d 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -474,6 +474,13 @@ struct bpf_verifier_env { /* longest register parentage chain walked for liveness marking */ u32 longest_mark_read_walk; bpfptr_t fd_array; + + /* bit mask to keep track of whether a register has been accessed + * since the last time the function state was printed + */ + u32 scratched_regs; + /* Same as scratched_regs but for stack slots */ + u64 scratched_stack_slots; }; __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, -- cgit v1.2.3 From b77beaaee1be62a4623dd7142868c2e1180d8288 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Tue, 14 Dec 2021 18:46:20 -0600 Subject: dt-bindings: power: imx8mn: add defines for DISP blk-ctrl domains This adds the defines for the power domains provided by the DISP blk-ctrl. Signed-off-by: Adam Ford Acked-by: Rob Herring Reviewed-by: Lucas Stach Signed-off-by: Shawn Guo --- include/dt-bindings/power/imx8mn-power.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/power/imx8mn-power.h b/include/dt-bindings/power/imx8mn-power.h index 102ee85a9b62..eedd0e581939 100644 --- a/include/dt-bindings/power/imx8mn-power.h +++ b/include/dt-bindings/power/imx8mn-power.h @@ -12,4 +12,9 @@ #define IMX8MN_POWER_DOMAIN_DISPMIX 3 #define IMX8MN_POWER_DOMAIN_MIPI 4 +#define IMX8MN_DISPBLK_PD_MIPI_DSI 0 +#define IMX8MN_DISPBLK_PD_MIPI_CSI 1 +#define IMX8MN_DISPBLK_PD_LCDIF 2 +#define IMX8MN_DISPBLK_PD_ISI 3 + #endif -- cgit v1.2.3 From 2e5766483c8c5cf886b4dc647a1741738dde7d79 Mon Sep 17 00:00:00 2001 From: Christy Lee Date: Thu, 16 Dec 2021 19:42:45 -0800 Subject: bpf: Right align verifier states in verifier logs. Make the verifier logs more readable, print the verifier states on the corresponding instruction line. If the previous line was not a bpf instruction, then print the verifier states on its own line. Before: Validating test_pkt_access_subprog3() func#3... 86: R1=invP(id=0) R2=ctx(id=0,off=0,imm=0) R10=fp0 ; int test_pkt_access_subprog3(int val, struct __sk_buff *skb) 86: (bf) r6 = r2 87: R2=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) 87: (bc) w7 = w1 88: R1=invP(id=0) R7_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) ; return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123)); 88: (bf) r1 = r6 89: R1_w=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) 89: (85) call pc+9 Func#4 is global and valid. Skipping. 90: R0_w=invP(id=0) 90: (bc) w8 = w0 91: R0_w=invP(id=0) R8_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) ; return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123)); 91: (b7) r1 = 123 92: R1_w=invP123 92: (85) call pc+65 Func#5 is global and valid. Skipping. 93: R0=invP(id=0) After: 86: R1=invP(id=0) R2=ctx(id=0,off=0,imm=0) R10=fp0 ; int test_pkt_access_subprog3(int val, struct __sk_buff *skb) 86: (bf) r6 = r2 ; R2=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) 87: (bc) w7 = w1 ; R1=invP(id=0) R7_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) ; return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123)); 88: (bf) r1 = r6 ; R1_w=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) 89: (85) call pc+9 Func#4 is global and valid. Skipping. 90: R0_w=invP(id=0) 90: (bc) w8 = w0 ; R0_w=invP(id=0) R8_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) ; return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123)); 91: (b7) r1 = 123 ; R1_w=invP123 92: (85) call pc+65 Func#5 is global and valid. Skipping. 93: R0=invP(id=0) Signed-off-by: Christy Lee Acked-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c66f238c538d..ee931398f311 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -388,6 +388,8 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) #define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2) #define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS) #define BPF_LOG_KERNEL (BPF_LOG_MASK + 1) /* kernel internal flag */ +#define BPF_LOG_MIN_ALIGNMENT 8U +#define BPF_LOG_ALIGNMENT 40U static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) { @@ -481,6 +483,7 @@ struct bpf_verifier_env { u32 scratched_regs; /* Same as scratched_regs but for stack slots */ u64 scratched_stack_slots; + u32 prev_log_len, prev_insn_print_len; }; __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, -- cgit v1.2.3 From bdecfceffeeb9000e78b0f613069f5c06974b347 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Nov 2021 23:21:54 +0100 Subject: ASoC: dai_dma: remove slave_id field This field is no longer set from any driver now, so remove the last references as well. Signed-off-by: Arnd Bergmann Acked-by: Mark Brown Link: https://lore.kernel.org/r/20211122222203.4103644-3-arnd@kernel.org Signed-off-by: Vinod Koul --- include/sound/dmaengine_pcm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h index 96666efddb39..38ea046e653c 100644 --- a/include/sound/dmaengine_pcm.h +++ b/include/sound/dmaengine_pcm.h @@ -60,7 +60,6 @@ struct dma_chan *snd_dmaengine_pcm_get_chan(struct snd_pcm_substream *substream) * @maxburst: Maximum number of words(note: words, as in units of the * src_addr_width member, not bytes) that can be send to or received from the * DAI in one burst. - * @slave_id: Slave requester id for the DMA channel. * @filter_data: Custom DMA channel filter data, this will usually be used when * requesting the DMA channel. * @chan_name: Custom channel name to use when requesting DMA channel. @@ -74,7 +73,6 @@ struct snd_dmaengine_dai_dma_data { dma_addr_t addr; enum dma_slave_buswidth addr_width; u32 maxburst; - unsigned int slave_id; void *filter_data; const char *chan_name; unsigned int fifo_size; -- cgit v1.2.3 From 03de6b273805b3c552ff158f8688555937375926 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Nov 2021 23:22:00 +0100 Subject: dmaengine: qcom-adm: stop abusing slave_id config The slave_id was previously used to pick one DMA slave instead of another, but this is now done through the DMA descriptors in device tree. For the qcom_adm driver, the configuration is documented in the DT binding to contain a tuple of device identifier and a "crci" field, but the implementation ends up using only a single cell for identifying the slave, with the crci getting passed in nonstandard properties of the device, and passed through the dma driver using the old slave_id field. Part of the problem apparently is that the nand driver ends up using only a single DMA request ID, but requires distinct values for "crci" depending on the type of transfer. Change both the dmaengine driver and the two slave drivers to allow the documented binding to work in addition to the ad-hoc passing of crci values. In order to no longer abuse the slave_id field, pass the data using the "peripheral_config" mechanism instead. Signed-off-by: Arnd Bergmann Acked-by: Mark Brown Link: https://lore.kernel.org/r/20211122222203.4103644-9-arnd@kernel.org Signed-off-by: Vinod Koul --- include/linux/dma/qcom_adm.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 include/linux/dma/qcom_adm.h (limited to 'include') diff --git a/include/linux/dma/qcom_adm.h b/include/linux/dma/qcom_adm.h new file mode 100644 index 000000000000..af20df674f0c --- /dev/null +++ b/include/linux/dma/qcom_adm.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-only +#ifndef __LINUX_DMA_QCOM_ADM_H +#define __LINUX_DMA_QCOM_ADM_H + +#include + +struct qcom_adm_peripheral_config { + u32 crci; + u32 mux; +}; + +#endif /* __LINUX_DMA_QCOM_ADM_H */ -- cgit v1.2.3 From 93cdb5b0dc56cc7a8b87a61146495f3bdc93d7ba Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Nov 2021 23:22:01 +0100 Subject: dmaengine: xilinx_dpdma: stop using slave_id field The display driver wants to pass a custom flag to the DMA engine driver, which it started doing by using the slave_id field that was traditionally used for a different purpose. As there is no longer a correct use for the slave_id field, it should really be removed, and the remaining users changed over to something different. The new mechanism for passing nonstandard settings is using the .peripheral_config field, so use that to pass a newly defined structure here, making it clear that this will not work in portable drivers. Reviewed-by: Laurent Pinchart Signed-off-by: Arnd Bergmann Acked-by: Mark Brown Link: https://lore.kernel.org/r/20211122222203.4103644-10-arnd@kernel.org Signed-off-by: Vinod Koul --- include/linux/dma/xilinx_dpdma.h | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 include/linux/dma/xilinx_dpdma.h (limited to 'include') diff --git a/include/linux/dma/xilinx_dpdma.h b/include/linux/dma/xilinx_dpdma.h new file mode 100644 index 000000000000..83a1377f03f8 --- /dev/null +++ b/include/linux/dma/xilinx_dpdma.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __LINUX_DMA_XILINX_DPDMA_H +#define __LINUX_DMA_XILINX_DPDMA_H + +#include + +struct xilinx_dpdma_peripheral_config { + bool video_group; +}; + +#endif /* __LINUX_DMA_XILINX_DPDMA_H */ -- cgit v1.2.3 From 3c219644075795a99271d345efdfa8b256e55161 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Nov 2021 23:22:03 +0100 Subject: dmaengine: remove slave_id config field All references to the slave_id field have been removed, so remove the field as well to prevent new references from creeping in again. Originally this allowed slave DMA drivers to configure which device is accessed with the dmaengine_slave_config() call, but this was inconsistent, as the same information is also passed while requesting a channel, and never changes in practice. In modern kernels, the device is always selected when requesting the channel, so the .slave_id field is no longer useful. Reviewed-by: Laurent Pinchart Signed-off-by: Arnd Bergmann Acked-by: Mark Brown Link: https://lore.kernel.org/r/20211122222203.4103644-12-arnd@kernel.org Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 9000f3ffce8b..0349b35235e6 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -418,9 +418,6 @@ enum dma_slave_buswidth { * @device_fc: Flow Controller Settings. Only valid for slave channels. Fill * with 'true' if peripheral should be flow controller. Direction will be * selected at Runtime. - * @slave_id: Slave requester id. Only valid for slave channels. The dma - * slave peripheral will have unique id as dma requester which need to be - * pass as slave config. * @peripheral_config: peripheral configuration for programming peripheral * for dmaengine transfer * @peripheral_size: peripheral configuration buffer size @@ -448,7 +445,6 @@ struct dma_slave_config { u32 src_port_window_size; u32 dst_port_window_size; bool device_fc; - unsigned int slave_id; void *peripheral_config; size_t peripheral_size; }; -- cgit v1.2.3 From 3d725965f836a7acbd1674e33644bec18373de53 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 7 Dec 2021 15:33:06 -0800 Subject: crypto: ccp - Add SEV_INIT_EX support Add new module parameter to allow users to use SEV_INIT_EX instead of SEV_INIT. This helps users who lock their SPI bus to use the PSP for SEV functionality. The 'init_ex_path' parameter defaults to NULL which means the kernel will use SEV_INIT, if a path is specified SEV_INIT_EX will be used with the data found at the path. On certain PSP commands this file is written to as the PSP updates the NV memory region. Depending on file system initialization this file open may fail during module init but the CCP driver for SEV already has sufficient retries for platform initialization. During normal operation of PSP system and SEV commands if the PSP has not been initialized it is at run time. If the file at 'init_ex_path' does not exist the PSP will not be initialized. The user must create the file prior to use with 32Kb of 0xFFs per spec. Signed-off-by: David Rientjes Co-developed-by: Peter Gonda Signed-off-by: Peter Gonda Reviewed-by: Marc Orr Reported-by: kernel test robot Acked-by: Brijesh Singh Cc: Tom Lendacky Cc: Brijesh Singh Cc: Marc Orr Cc: Joerg Roedel Cc: Herbert Xu Cc: David Rientjes Cc: John Allen Cc: "David S. Miller" Cc: Paolo Bonzini Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- include/linux/psp-sev.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index d48a7192e881..1595088c428b 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -52,6 +52,7 @@ enum sev_cmd { SEV_CMD_DF_FLUSH = 0x00A, SEV_CMD_DOWNLOAD_FIRMWARE = 0x00B, SEV_CMD_GET_ID = 0x00C, + SEV_CMD_INIT_EX = 0x00D, /* Guest commands */ SEV_CMD_DECOMMISSION = 0x020, @@ -102,6 +103,26 @@ struct sev_data_init { u32 tmr_len; /* In */ } __packed; +/** + * struct sev_data_init_ex - INIT_EX command parameters + * + * @length: len of the command buffer read by the PSP + * @flags: processing flags + * @tmr_address: system physical address used for SEV-ES + * @tmr_len: len of tmr_address + * @nv_address: system physical address used for PSP NV storage + * @nv_len: len of nv_address + */ +struct sev_data_init_ex { + u32 length; /* In */ + u32 flags; /* In */ + u64 tmr_address; /* In */ + u32 tmr_len; /* In */ + u32 reserved; /* In */ + u64 nv_address; /* In/Out */ + u32 nv_len; /* In */ +} __packed; + #define SEV_INIT_FLAGS_SEV_ES 0x01 /** -- cgit v1.2.3 From 244d22ffd656bc8e5c49a2cd2fdfeb0e52a9730f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 10 Dec 2021 16:30:09 +0200 Subject: crypto: api - Replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Signed-off-by: Andy Shevchenko Signed-off-by: Herbert Xu --- include/crypto/aead.h | 4 +++- include/crypto/algapi.h | 5 ++++- include/crypto/blake2b.h | 1 - include/crypto/blake2s.h | 2 +- include/crypto/cryptd.h | 3 ++- include/crypto/engine.h | 6 +++++- include/crypto/pcrypt.h | 2 +- include/crypto/scatterwalk.h | 3 ++- include/crypto/skcipher.h | 6 +++++- 9 files changed, 23 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/crypto/aead.h b/include/crypto/aead.h index 5af914c1ab8e..14db3bee0519 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -8,9 +8,10 @@ #ifndef _CRYPTO_AEAD_H #define _CRYPTO_AEAD_H +#include #include -#include #include +#include /** * DOC: Authenticated Encryption With Associated Data (AEAD) Cipher API @@ -73,6 +74,7 @@ */ struct crypto_aead; +struct scatterlist; /** * struct aead_request - AEAD request diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 5f6841c73e5a..f76ec723ceae 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -7,9 +7,11 @@ #ifndef _CRYPTO_ALGAPI_H #define _CRYPTO_ALGAPI_H +#include #include +#include #include -#include +#include /* * Maximum values for blocksize and alignmask, used to allocate @@ -24,6 +26,7 @@ struct crypto_aead; struct crypto_instance; struct module; +struct notifier_block; struct rtattr; struct seq_file; struct sk_buff; diff --git a/include/crypto/blake2b.h b/include/crypto/blake2b.h index 18875f16f8ca..0c0176285349 100644 --- a/include/crypto/blake2b.h +++ b/include/crypto/blake2b.h @@ -5,7 +5,6 @@ #include #include -#include #include enum blake2b_lengths { diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h index bc3fb59442ce..df3c6c2f9553 100644 --- a/include/crypto/blake2s.h +++ b/include/crypto/blake2s.h @@ -7,8 +7,8 @@ #define _CRYPTO_BLAKE2S_H #include +#include #include -#include #include enum blake2s_lengths { diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h index 23169f4d87e6..796d986e58e1 100644 --- a/include/crypto/cryptd.h +++ b/include/crypto/cryptd.h @@ -13,7 +13,8 @@ #ifndef _CRYPTO_CRYPT_H #define _CRYPTO_CRYPT_H -#include +#include + #include #include #include diff --git a/include/crypto/engine.h b/include/crypto/engine.h index fd4f2fa23f51..ae133e98d813 100644 --- a/include/crypto/engine.h +++ b/include/crypto/engine.h @@ -9,8 +9,10 @@ #include #include -#include #include +#include +#include + #include #include #include @@ -18,6 +20,8 @@ #include #include +struct device; + #define ENGINE_NAME_LEN 30 /* * struct crypto_engine - crypto hardware engine diff --git a/include/crypto/pcrypt.h b/include/crypto/pcrypt.h index b9bc3436196a..234d7cf3cf5e 100644 --- a/include/crypto/pcrypt.h +++ b/include/crypto/pcrypt.h @@ -9,8 +9,8 @@ #ifndef _CRYPTO_PCRYPT_H #define _CRYPTO_PCRYPT_H +#include #include -#include #include struct pcrypt_request { diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h index 7af08174a721..6407b4b61350 100644 --- a/include/crypto/scatterwalk.h +++ b/include/crypto/scatterwalk.h @@ -12,8 +12,9 @@ #define _CRYPTO_SCATTERWALK_H #include + #include -#include +#include #include static inline void scatterwalk_crypto_chain(struct scatterlist *head, diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index ef0fc9ed4342..39f5b67c3069 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -8,9 +8,13 @@ #ifndef _CRYPTO_SKCIPHER_H #define _CRYPTO_SKCIPHER_H +#include #include -#include #include +#include +#include + +struct scatterlist; /** * struct skcipher_request - Symmetric key cipher request -- cgit v1.2.3 From 9dfa5b6f5efb85efe69fd3b7b0b912004d9547f1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 16 Dec 2021 09:17:03 +0800 Subject: iommu/vt-d: Remove unused macros These macros has no reference in the tree anymore. Cleanup them. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211216011703.763331-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-svm.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 57cceecbe37f..1b73bab7eeff 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -8,12 +8,6 @@ #ifndef __INTEL_SVM_H__ #define __INTEL_SVM_H__ -/* Values for rxwp in fault_cb callback */ -#define SVM_REQ_READ (1<<3) -#define SVM_REQ_WRITE (1<<2) -#define SVM_REQ_EXEC (1<<1) -#define SVM_REQ_PRIV (1<<0) - /* Page Request Queue depth */ #define PRQ_ORDER 2 #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) -- cgit v1.2.3 From b62e3317b68d9c84301940ca8ca9c35a584111b2 Mon Sep 17 00:00:00 2001 From: Xiang wangx Date: Thu, 16 Dec 2021 23:19:16 +0800 Subject: net: fix typo in a comment The double 'as' in a comment is repeated, thus it should be removed. Signed-off-by: Xiang wangx Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index be5cb3360b94..6aadcc0ecb5b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1937,7 +1937,7 @@ enum netdev_ml_priv_type { * @udp_tunnel_nic: UDP tunnel offload state * @xdp_state: stores info on attached XDP BPF programs * - * @nested_level: Used as as a parameter of spin_lock_nested() of + * @nested_level: Used as a parameter of spin_lock_nested() of * dev->addr_list_lock. * @unlink_list: As netif_addr_lock() can be called recursively, * keep a list of interfaces to be deleted. -- cgit v1.2.3 From 60ded273e4c047aec364f70195aced71a4082f90 Mon Sep 17 00:00:00 2001 From: Karol Trzcinski Date: Thu, 16 Dec 2021 17:24:22 -0600 Subject: ipc: debug: Add shared memory heap to memory scan Newly added shared heap zones should be taken into account during memory usage scanning. Reviewed-by: Kai Vehmanen Reviewed-by: Liam Girdwood Signed-off-by: Karol Trzcinski Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20211216232422.345164-4-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof/debug.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/sof/debug.h b/include/sound/sof/debug.h index 3ecb5793789d..38693e3fb514 100644 --- a/include/sound/sof/debug.h +++ b/include/sound/sof/debug.h @@ -19,6 +19,8 @@ enum sof_ipc_dbg_mem_zone { SOF_IPC_MEM_ZONE_SYS_RUNTIME = 1, /**< System-runtime zone */ SOF_IPC_MEM_ZONE_RUNTIME = 2, /**< Runtime zone */ SOF_IPC_MEM_ZONE_BUFFER = 3, /**< Buffer zone */ + SOF_IPC_MEM_ZONE_RUNTIME_SHARED = 4, /**< System runtime zone */ + SOF_IPC_MEM_ZONE_SYS_SHARED = 5, /**< System shared zone */ }; /** ABI3.18 */ -- cgit v1.2.3 From dd61b29207ca4f346fbd9c06bc49f093e3369185 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 7 Dec 2021 10:34:06 +0100 Subject: gpiolib: provide gpiod_remove_hogs() Currently all users of gpiod_add_hogs() call it only once at system init so there never was any need for a mechanism allowing to remove them. Now the upcoming gpio-sim will need to tear down chips with hogged lines so provide a function that allows to remove hogs. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Acked-by: Linus Walleij --- include/linux/gpio/machine.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h index d755e529c1e3..2647dd10b541 100644 --- a/include/linux/gpio/machine.h +++ b/include/linux/gpio/machine.h @@ -100,6 +100,7 @@ void gpiod_add_lookup_table(struct gpiod_lookup_table *table); void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n); void gpiod_remove_lookup_table(struct gpiod_lookup_table *table); void gpiod_add_hogs(struct gpiod_hog *hogs); +void gpiod_remove_hogs(struct gpiod_hog *hogs); #else /* ! CONFIG_GPIOLIB */ static inline void gpiod_add_lookup_table(struct gpiod_lookup_table *table) {} @@ -108,6 +109,7 @@ void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n) {} static inline void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) {} static inline void gpiod_add_hogs(struct gpiod_hog *hogs) {} +static inline void gpiod_remove_hogs(struct gpiod_hog *hogs) {} #endif /* CONFIG_GPIOLIB */ #endif /* __LINUX_GPIO_MACHINE_H */ -- cgit v1.2.3 From 990f6756bb64756d2d1033118cded6333b43397d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 13 Dec 2021 11:16:41 +0100 Subject: gpiolib: allow to specify the firmware node in struct gpio_chip Software nodes allow us to represent hierarchies for device components that don't have their struct device representation yet - for instance: banks of GPIOs under a common GPIO expander. The core gpiolib core however doesn't offer any way of passing this information from the drivers. This extends struct gpio_chip with a pointer to fwnode that can be set by the driver and used to pass device properties for child nodes. This is similar to how we handle device-tree sub-nodes with CONFIG_OF_GPIO enabled. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Acked-by: Linus Walleij --- include/linux/gpio/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index a673a359e20b..b0728c8ad90c 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -289,6 +289,7 @@ struct gpio_irq_chip { * number or the name of the SoC IP-block implementing it. * @gpiodev: the internal state holder, opaque struct * @parent: optional parent device providing the GPIOs + * @fwnode: optional fwnode providing this controller's properties * @owner: helps prevent removal of modules exporting active GPIOs * @request: optional hook for chip-specific activation, such as * enabling module power and clock; may sleep @@ -377,6 +378,7 @@ struct gpio_chip { const char *label; struct gpio_device *gpiodev; struct device *parent; + struct fwnode_handle *fwnode; struct module *owner; int (*request)(struct gpio_chip *gc, -- cgit v1.2.3 From a8b10f3d12cfc168a389f3d97e1f762732d4d849 Mon Sep 17 00:00:00 2001 From: Prathamesh Shete Date: Fri, 10 Dec 2021 17:02:03 +0100 Subject: dt-bindings: gpio: Add Tegra234 support Extend the existing Tegra186 GPIO controller device tree bindings with support for the GPIO controller found on Tegra234. The number of pins is slightly different, but the programming model remains the same. Signed-off-by: Prathamesh Shete Reviewed-by: Linus Walleij Reviewed-by: Rob Herring [treding@nvidia.com: update device tree bindings] Signed-off-by: Thierry Reding Signed-off-by: Bartosz Golaszewski --- include/dt-bindings/gpio/tegra234-gpio.h | 63 ++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 include/dt-bindings/gpio/tegra234-gpio.h (limited to 'include') diff --git a/include/dt-bindings/gpio/tegra234-gpio.h b/include/dt-bindings/gpio/tegra234-gpio.h new file mode 100644 index 000000000000..d7a1f2e298e8 --- /dev/null +++ b/include/dt-bindings/gpio/tegra234-gpio.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. */ + +/* + * This header provides constants for binding nvidia,tegra234-gpio*. + * + * The first cell in Tegra's GPIO specifier is the GPIO ID. The macros below + * provide names for this. + * + * The second cell contains standard flag values specified in gpio.h. + */ + +#ifndef _DT_BINDINGS_GPIO_TEGRA234_GPIO_H +#define _DT_BINDINGS_GPIO_TEGRA234_GPIO_H + +#include + +/* GPIOs implemented by main GPIO controller */ +#define TEGRA234_MAIN_GPIO_PORT_A 0 +#define TEGRA234_MAIN_GPIO_PORT_B 1 +#define TEGRA234_MAIN_GPIO_PORT_C 2 +#define TEGRA234_MAIN_GPIO_PORT_D 3 +#define TEGRA234_MAIN_GPIO_PORT_E 4 +#define TEGRA234_MAIN_GPIO_PORT_F 5 +#define TEGRA234_MAIN_GPIO_PORT_G 6 +#define TEGRA234_MAIN_GPIO_PORT_H 7 +#define TEGRA234_MAIN_GPIO_PORT_I 8 +#define TEGRA234_MAIN_GPIO_PORT_J 9 +#define TEGRA234_MAIN_GPIO_PORT_K 10 +#define TEGRA234_MAIN_GPIO_PORT_L 11 +#define TEGRA234_MAIN_GPIO_PORT_M 12 +#define TEGRA234_MAIN_GPIO_PORT_N 13 +#define TEGRA234_MAIN_GPIO_PORT_P 14 +#define TEGRA234_MAIN_GPIO_PORT_Q 15 +#define TEGRA234_MAIN_GPIO_PORT_R 16 +#define TEGRA234_MAIN_GPIO_PORT_S 17 +#define TEGRA234_MAIN_GPIO_PORT_T 18 +#define TEGRA234_MAIN_GPIO_PORT_U 19 +#define TEGRA234_MAIN_GPIO_PORT_V 20 +#define TEGRA234_MAIN_GPIO_PORT_X 21 +#define TEGRA234_MAIN_GPIO_PORT_Y 22 +#define TEGRA234_MAIN_GPIO_PORT_Z 23 +#define TEGRA234_MAIN_GPIO_PORT_AC 24 +#define TEGRA234_MAIN_GPIO_PORT_AD 25 +#define TEGRA234_MAIN_GPIO_PORT_AE 26 +#define TEGRA234_MAIN_GPIO_PORT_AF 27 +#define TEGRA234_MAIN_GPIO_PORT_AG 28 + +#define TEGRA234_MAIN_GPIO(port, offset) \ + ((TEGRA234_MAIN_GPIO_PORT_##port * 8) + offset) + +/* GPIOs implemented by AON GPIO controller */ +#define TEGRA234_AON_GPIO_PORT_AA 0 +#define TEGRA234_AON_GPIO_PORT_BB 1 +#define TEGRA234_AON_GPIO_PORT_CC 2 +#define TEGRA234_AON_GPIO_PORT_DD 3 +#define TEGRA234_AON_GPIO_PORT_EE 4 +#define TEGRA234_AON_GPIO_PORT_GG 5 + +#define TEGRA234_AON_GPIO(port, offset) \ + ((TEGRA234_AON_GPIO_PORT_##port * 8) + offset) + +#endif -- cgit v1.2.3 From 733e417518a69b71061c3bafc2bf106109565eee Mon Sep 17 00:00:00 2001 From: Wasin Thonkaew Date: Wed, 3 Nov 2021 19:42:08 +0000 Subject: asm-generic/error-injection.h: fix a spelling mistake, and a coding style issue Fix a spelling mistake "ganerating" -> "generating". Remove trailing semicolon for a macro ALLOW_ERROR_INJECTION to fix a coding style issue. Signed-off-by: Wasin Thonkaew Acked-by: Masami Hiramatsu Signed-off-by: Arnd Bergmann --- include/asm-generic/error-injection.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h index 7ddd9dc10ce9..fbca56bd9cbc 100644 --- a/include/asm-generic/error-injection.h +++ b/include/asm-generic/error-injection.h @@ -20,7 +20,7 @@ struct pt_regs; #ifdef CONFIG_FUNCTION_ERROR_INJECTION /* - * Whitelist ganerating macro. Specify functions which can be + * Whitelist generating macro. Specify functions which can be * error-injectable using this macro. */ #define ALLOW_ERROR_INJECTION(fname, _etype) \ @@ -29,7 +29,7 @@ static struct error_injection_entry __used \ _eil_addr_##fname = { \ .addr = (unsigned long)fname, \ .etype = EI_ETYPE_##_etype, \ - }; + } void override_function_with_return(struct pt_regs *regs); #else -- cgit v1.2.3 From c06ef740d401d0f4ab188882bf6f8d9cf0f75eaf Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 7 Dec 2021 00:20:59 +0000 Subject: PM: core: Redefine pm_ptr() macro The pm_ptr() macro was previously conditionally defined, according to the value of the CONFIG_PM option. This meant that the pointed structure was either referenced (if CONFIG_PM was set), or never referenced (if CONFIG_PM was not set), causing it to be detected as unused by the compiler. This worked fine, but required the __maybe_unused compiler attribute to be used to every symbol pointed to by a pointer wrapped with pm_ptr(). We can do better. With this change, the pm_ptr() is now defined the same, independently of the value of CONFIG_PM. It now uses the (?:) ternary operator to conditionally resolve to its argument. Since the condition is known at compile time, the compiler will then choose to discard the unused symbols, which won't need to be tagged with __maybe_unused anymore. This pm_ptr() macro is usually used with pointers to dev_pm_ops structures created with SIMPLE_DEV_PM_OPS() or similar macros. These do use a __maybe_unused flag, which is now useless with this change, so it later can be removed. However in the meantime it causes no harm, and all the drivers still compile fine with the new pm_ptr() macro. Signed-off-by: Paul Cercueil Reviewed-by: Jonathan Cameron Reviewed-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index 1d8209c09686..b88ac7dcf2a2 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -373,11 +373,7 @@ const struct dev_pm_ops __maybe_unused name = { \ SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ } -#ifdef CONFIG_PM -#define pm_ptr(_ptr) (_ptr) -#else -#define pm_ptr(_ptr) NULL -#endif +#define pm_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM), (_ptr)) /* * PM_EVENT_ messages -- cgit v1.2.3 From 1a3c7bb088266fa2db017be299f91f1c1894c857 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 7 Dec 2021 00:21:00 +0000 Subject: PM: core: Add new *_PM_OPS macros, deprecate old ones This commit introduces the following macros: SYSTEM_SLEEP_PM_OPS() LATE_SYSTEM_SLEEP_PM_OPS() NOIRQ_SYSTEM_SLEEP_PM_OPS() RUNTIME_PM_OPS() These new macros are very similar to their SET_*_PM_OPS() equivalent. They however differ in the fact that the callbacks they set will always be seen as referenced by the compiler. This means that the callback functions don't need to be wrapped with a #ifdef CONFIG_PM guard, or tagged with __maybe_unused, to prevent the compiler from complaining about unused static symbols. The compiler will then simply evaluate at compile time whether or not these symbols are dead code. The callbacks that are only useful with CONFIG_PM_SLEEP is enabled, are now also wrapped with a new pm_sleep_ptr() macro, which is inspired from pm_ptr(). This is needed for drivers that use different callbacks for sleep and runtime PM, to handle the case where CONFIG_PM is set and CONFIG_PM_SLEEP is not. This commit also deprecates the following macros: SIMPLE_DEV_PM_OPS() UNIVERSAL_DEV_PM_OPS() And introduces the following macros: DEFINE_SIMPLE_DEV_PM_OPS() DEFINE_UNIVERSAL_DEV_PM_OPS() These macros are similar to the functions they were created to replace, with the following differences: - They use the new macros introduced above, and as such always reference the provided callback functions. - They are not tagged with __maybe_unused. They are meant to be used with pm_ptr() or pm_sleep_ptr() for DEFINE_UNIVERSAL_DEV_PM_OPS() and DEFINE_SIMPLE_DEV_PM_OPS() respectively. - They declare the symbol static, since every driver seems to do that anyway; and if a non-static use-case is needed an indirection pointer could be used. The point of this change, is to progressively switch from a code model where PM callbacks are all protected behind CONFIG_PM guards, to a code model where the PM callbacks are always seen by the compiler, but discarded if not used. Signed-off-by: Paul Cercueil Reviewed-by: Jonathan Cameron Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 74 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index b88ac7dcf2a2..fc9691cb01b4 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -300,47 +300,59 @@ struct dev_pm_ops { int (*runtime_idle)(struct device *dev); }; +#define SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + .suspend = pm_sleep_ptr(suspend_fn), \ + .resume = pm_sleep_ptr(resume_fn), \ + .freeze = pm_sleep_ptr(suspend_fn), \ + .thaw = pm_sleep_ptr(resume_fn), \ + .poweroff = pm_sleep_ptr(suspend_fn), \ + .restore = pm_sleep_ptr(resume_fn), + +#define LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + .suspend_late = pm_sleep_ptr(suspend_fn), \ + .resume_early = pm_sleep_ptr(resume_fn), \ + .freeze_late = pm_sleep_ptr(suspend_fn), \ + .thaw_early = pm_sleep_ptr(resume_fn), \ + .poweroff_late = pm_sleep_ptr(suspend_fn), \ + .restore_early = pm_sleep_ptr(resume_fn), + +#define NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + .suspend_noirq = pm_sleep_ptr(suspend_fn), \ + .resume_noirq = pm_sleep_ptr(resume_fn), \ + .freeze_noirq = pm_sleep_ptr(suspend_fn), \ + .thaw_noirq = pm_sleep_ptr(resume_fn), \ + .poweroff_noirq = pm_sleep_ptr(suspend_fn), \ + .restore_noirq = pm_sleep_ptr(resume_fn), + +#define RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ + .runtime_suspend = suspend_fn, \ + .runtime_resume = resume_fn, \ + .runtime_idle = idle_fn, + #ifdef CONFIG_PM_SLEEP #define SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ - .suspend = suspend_fn, \ - .resume = resume_fn, \ - .freeze = suspend_fn, \ - .thaw = resume_fn, \ - .poweroff = suspend_fn, \ - .restore = resume_fn, + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #else #define SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #endif #ifdef CONFIG_PM_SLEEP #define SET_LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ - .suspend_late = suspend_fn, \ - .resume_early = resume_fn, \ - .freeze_late = suspend_fn, \ - .thaw_early = resume_fn, \ - .poweroff_late = suspend_fn, \ - .restore_early = resume_fn, + LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #else #define SET_LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #endif #ifdef CONFIG_PM_SLEEP #define SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ - .suspend_noirq = suspend_fn, \ - .resume_noirq = resume_fn, \ - .freeze_noirq = suspend_fn, \ - .thaw_noirq = resume_fn, \ - .poweroff_noirq = suspend_fn, \ - .restore_noirq = resume_fn, + NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #else #define SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #endif #ifdef CONFIG_PM #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ - .runtime_suspend = suspend_fn, \ - .runtime_resume = resume_fn, \ - .runtime_idle = idle_fn, + RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) #else #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) #endif @@ -349,9 +361,9 @@ struct dev_pm_ops { * Use this if you want to use the same suspend and resume callbacks for suspend * to RAM and hibernation. */ -#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ -const struct dev_pm_ops __maybe_unused name = { \ - SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ +#define DEFINE_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ +static const struct dev_pm_ops name = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } /* @@ -367,6 +379,19 @@ const struct dev_pm_ops __maybe_unused name = { \ * .resume_early(), to the same routines as .runtime_suspend() and * .runtime_resume(), respectively (and analogously for hibernation). */ +#define DEFINE_UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ +static const struct dev_pm_ops name = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ +} + +/* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */ +#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ +const struct dev_pm_ops __maybe_unused name = { \ + SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ +} + +/* Deprecated. Use DEFINE_UNIVERSAL_DEV_PM_OPS() instead. */ #define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ const struct dev_pm_ops __maybe_unused name = { \ SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ @@ -374,6 +399,7 @@ const struct dev_pm_ops __maybe_unused name = { \ } #define pm_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM), (_ptr)) +#define pm_sleep_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM_SLEEP), (_ptr)) /* * PM_EVENT_ messages -- cgit v1.2.3 From 931da6a0de5d620425af4425344259e6ff46b654 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 7 Dec 2021 21:17:34 +0800 Subject: powercap: intel_rapl: support new layout of Psys PowerLimit Register on SPR On Sapphire Rapids, the layout of the Psys domain Power Limit Register is different from from what it was before. Enhance the code to support the new Psys PL register layout. Signed-off-by: Zhang Rui Reported-and-tested-by: Alkattan Dana [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/intel_rapl.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index 93780834fc8f..9f4b6f5b822f 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -58,6 +58,12 @@ enum rapl_primitives { THROTTLED_TIME, PRIORITY_LEVEL, + PSYS_POWER_LIMIT1, + PSYS_POWER_LIMIT2, + PSYS_PL1_ENABLE, + PSYS_PL2_ENABLE, + PSYS_TIME_WINDOW1, + PSYS_TIME_WINDOW2, /* below are not raw primitive data */ AVERAGE_POWER, NR_RAPL_PRIMITIVES, -- cgit v1.2.3 From c24efa6732788f0be22cdf5d2aedd5e3117e983f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 7 Dec 2021 19:54:32 +0100 Subject: PM: runtime: Capture device status before disabling runtime PM In some cases (for example, during system-wide suspend and resume of devices) it is useful to know whether or not runtime PM has ever been enabled for a given device and, if so, what the runtime PM status of it had been right before runtime PM was disabled for it last time. For this reason, introduce a new struct dev_pm_info field called last_status that will be used for capturing the runtime PM status of the device when its power.disable_depth counter changes from 0 to 1. The new field will be set to RPM_INVALID to start with and whenever power.disable_depth changes from 1 to 0, so it will be valid only when runtime PM of the device is currently disabled, but it has been enabled at least once. Immediately use power.last_status in rpm_resume() to make it handle the case when PM runtime is disabled for the device, but its runtime PM status is RPM_ACTIVE more consistently. Namely, make it return 1 if power.last_status is also equal to RPM_ACTIVE in that case (the idea being that if the status was RPM_ACTIVE last time when power.disable_depth was changing from 0 to 1 and it is still RPM_ACTIVE, it can be assumed to reflect what happened to the device last time when it was using runtime PM) and -EACCES otherwise. Update the documentation to provide a description of last_status and change the description of pm_runtime_resume() in it to reflect the new behavior of rpm_active(). While at it, rearrange the code in pm_runtime_enable() to be more straightforward and replace the WARN() macro in it with a pr_warn() invocation which is less disruptive. Link: https://lore.kernel.org/linux-pm/20211026222626.39222-1-ulf.hansson@linaro.org/t/#u Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index fc9691cb01b4..e1e9402180b9 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -521,6 +521,7 @@ const struct dev_pm_ops __maybe_unused name = { \ */ enum rpm_status { + RPM_INVALID = -1, RPM_ACTIVE = 0, RPM_RESUMING, RPM_SUSPENDED, @@ -634,6 +635,7 @@ struct dev_pm_info { unsigned int links_count; enum rpm_request request; enum rpm_status runtime_status; + enum rpm_status last_status; int runtime_error; int autosuspend_delay; u64 last_busy; -- cgit v1.2.3 From d1579e61192e0e686faa4208500ef4c3b529b16c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Dec 2021 17:10:13 +0100 Subject: PM: runtime: Add safety net to supplier device release Because refcount_dec_not_one() returns true if the target refcount becomes saturated, it is generally unsafe to use its return value as a loop termination condition, but that is what happens when a device link's supplier device is released during runtime PM suspend operations and on device link removal. To address this, introduce pm_runtime_release_supplier() to be used in the above cases which will check the supplier device's runtime PM usage counter in addition to the refcount_dec_not_one() return value, so the loop can be terminated in case the rpm_active refcount value becomes invalid, and update the code in question to use it as appropriate. This change is not expected to have any visible functional impact. Reported-by: Peter Zijlstra Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Acked-by: Peter Zijlstra (Intel) --- include/linux/pm_runtime.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index eddd66d426ca..016de5776b6d 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -58,6 +58,7 @@ extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); +extern void pm_runtime_release_supplier(struct device_link *link, bool check_idle); extern int devm_pm_runtime_enable(struct device *dev); @@ -283,6 +284,8 @@ static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} static inline void pm_runtime_drop_link(struct device_link *link) {} +static inline void pm_runtime_release_supplier(struct device_link *link, + bool check_idle) {} #endif /* !CONFIG_PM */ -- cgit v1.2.3 From 227fee5fc99eeb74d43bf68832f6d59d30ac07d8 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 16 Dec 2021 13:42:25 +0530 Subject: bus: mhi: core: Add an API for auto queueing buffers for DL channel Add a new API "mhi_prepare_for_transfer_autoqueue" for using with client drivers like QRTR to request MHI core to autoqueue buffers for the DL channel along with starting both UL and DL channels. So far, the "auto_queue" flag specified by the controller drivers in channel definition served this purpose but this will be removed at some point in future. Cc: netdev@vger.kernel.org Cc: Jakub Kicinski Cc: David S. Miller Cc: Greg Kroah-Hartman Co-developed-by: Loic Poulain Acked-by: Jakub Kicinski Signed-off-by: Loic Poulain Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20211216081227.237749-9-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index a5cc4cdf9cc8..a5441ad33c74 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -730,15 +730,26 @@ void mhi_device_put(struct mhi_device *mhi_dev); /** * mhi_prepare_for_transfer - Setup UL and DL channels for data transfer. - * Allocate and initialize the channel context and - * also issue the START channel command to both - * channels. Channels can be started only if both - * host and device execution environments match and - * channels are in a DISABLED state. * @mhi_dev: Device associated with the channels + * + * Allocate and initialize the channel context and also issue the START channel + * command to both channels. Channels can be started only if both host and + * device execution environments match and channels are in a DISABLED state. */ int mhi_prepare_for_transfer(struct mhi_device *mhi_dev); +/** + * mhi_prepare_for_transfer_autoqueue - Setup UL and DL channels with auto queue + * buffers for DL traffic + * @mhi_dev: Device associated with the channels + * + * Allocate and initialize the channel context and also issue the START channel + * command to both channels. Channels can be started only if both host and + * device execution environments match and channels are in a DISABLED state. + * The MHI core will automatically allocate and queue buffers for the DL traffic. + */ +int mhi_prepare_for_transfer_autoqueue(struct mhi_device *mhi_dev); + /** * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer. * Issue the RESET channel command and let the -- cgit v1.2.3 From 49f39cb0ef198ae3c73765c9b9ee3034e4c9f076 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 1 Dec 2021 14:59:30 +0200 Subject: device property: Fix documentation for FWNODE_GRAPH_DEVICE_DISABLED FWNODE_GRAPH_DEVICE_DISABLED flag was meant for also returning endpoints connected to disabled devices, but it also may return endpoints that are not connected. Fix this in documentation. Also fwnode_graph_get_endpoint_by_id() was affeced by this. Also improve the language a little bit. Fixes: 0fcc2bdc8aff ("device property: Add fwnode_graph_get_endpoint_by_id()") Signed-off-by: Sakari Ailus Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/property.h b/include/linux/property.h index 16f736c698a2..7a2df45ec3ae 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -414,7 +414,8 @@ static inline bool fwnode_graph_is_endpoint(struct fwnode_handle *fwnode) * one. * @FWNODE_GRAPH_DEVICE_DISABLED: That the device to which the remote * endpoint of the given endpoint belongs to, - * may be disabled. + * may be disabled, or that the endpoint is not + * connected. */ #define FWNODE_GRAPH_ENDPOINT_NEXT BIT(0) #define FWNODE_GRAPH_DEVICE_DISABLED BIT(1) -- cgit v1.2.3 From c87b8fc569667610b4891cad1e4a663e5a94d8f8 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 1 Dec 2021 14:59:33 +0200 Subject: device property: Implement fwnode_graph_get_endpoint_count() Add fwnode_graph_get_endpoint_count() function to provide generic implementation of of_graph_get_endpoint_count(). The former by default only counts endpoints to available devices which is consistent with the rest of the fwnode graph API. By providing FWNODE_GRAPH_DEVICE_DISABLED flag, also unconnected endpoints and endpoints to disabled devices are counted. Signed-off-by: Sakari Ailus Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/property.h b/include/linux/property.h index 7a2df45ec3ae..8c0104871252 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -423,6 +423,8 @@ static inline bool fwnode_graph_is_endpoint(struct fwnode_handle *fwnode) struct fwnode_handle * fwnode_graph_get_endpoint_by_id(const struct fwnode_handle *fwnode, u32 port, u32 endpoint, unsigned long flags); +unsigned int fwnode_graph_get_endpoint_count(struct fwnode_handle *fwnode, + unsigned long flags); #define fwnode_graph_for_each_endpoint(fwnode, child) \ for (child = NULL; \ -- cgit v1.2.3 From c49eea6ffec626c059ace085fce1bf501b05dbc7 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 1 Dec 2021 15:01:15 +0200 Subject: device property: Drop fwnode_graph_get_remote_node() fwnode_graph_get_remote_node() is only used by the tegra-video driver. Convert it to use newer fwnode_graph_get_endpoint_by_id() and drop now-unused fwnode_graph_get_remote_node(). Signed-off-by: Sakari Ailus Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/property.h b/include/linux/property.h index 8c0104871252..8355f99ebd47 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -397,9 +397,6 @@ struct fwnode_handle *fwnode_graph_get_remote_port( const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_graph_get_remote_endpoint( const struct fwnode_handle *fwnode); -struct fwnode_handle * -fwnode_graph_get_remote_node(const struct fwnode_handle *fwnode, u32 port, - u32 endpoint); static inline bool fwnode_graph_is_endpoint(struct fwnode_handle *fwnode) { -- cgit v1.2.3 From e3c963c498871e0d4b2eceb32e2b989493838ccc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Dec 2021 17:36:20 +0100 Subject: ACPI: scan: Introduce acpi_fetch_acpi_dev() Introduce acpi_fetch_acpi_dev() as a more reasonable replacement for acpi_bus_get_device() and modify the code in scan.c to use it instead of the latter. No expected functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Reviewed-by: Hans de Goede --- include/acpi/acpi_bus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 480f9207a4c6..17f700c9a4f9 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -505,6 +505,7 @@ extern int unregister_acpi_notifier(struct notifier_block *); */ int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device); +struct acpi_device *acpi_fetch_acpi_dev(acpi_handle handle); acpi_status acpi_bus_get_status_handle(acpi_handle handle, unsigned long long *sta); int acpi_bus_get_status(struct acpi_device *device); -- cgit v1.2.3 From bd0b536dc2e1e9828a85b1e3470ee7bafc3b36f6 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Nov 2021 16:15:59 -0800 Subject: virtchnl: Add support for new VLAN capabilities Currently VIRTCHNL only allows for VLAN filtering and offloads to happen on a single 802.1Q VLAN. Add support to filter and offload on inner, outer, and/or inner + outer VLANs. This is done by introducing the new capability VIRTCHNL_VF_OFFLOAD_VLAN_V2. The flow to negotiate this new capability is shown below. 1. VF - sets the VIRTCHNL_VF_OFFLOAD_VLAN_V2 bit in the virtchnl_vf_resource.vf_caps_flags during the VIRTCHNL_OP_GET_VF_RESOURCES request message. The VF should also set the VIRTCHNL_VF_OFFLOAD_VLAN bit in case the PF driver doesn't support the new capability. 2. PF - sets the VLAN capability bit it supports in the VIRTCHNL_OP_GET_VF_RESOURCES response message. This will either be VIRTCHNL_VF_OFFLOAD_VLAN_V2, VIRTCHNL_VF_OFFLOAD_VLAN, or none. 3. VF - If the VIRTCHNL_VF_OFFLOAD_VLAN_V2 capability was ACK'd by the PF, then the VF needs to request the VLAN capabilities of the PF/Device by issuing a VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS request. If the VIRTCHNL_VF_OFFLOAD_VLAN capability was ACK'd then the VF knows only single 802.1Q VLAN filtering/offloads are supported. If no VLAN capability is ACK'd then the PF/Device doesn't support hardware VLAN filtering/offloads for this VF. 4. PF - Populates the virtchnl_vlan_caps structure based on what it allows/supports for that VF and sends that response via VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS. After VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS is successfully negotiated the VF driver needs to interpret the capabilities supported by the underlying PF/Device. The VF will be allowed to filter/offload the inner 802.1Q, outer (various ethertype), inner 802.1Q + outer (various ethertypes), or none based on which fields are set. The VF will also need to interpret where the VLAN tag should be inserted and/or stripped based on the negotiated capabilities. Signed-off-by: Brett Creeley Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 377 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 377 insertions(+) (limited to 'include') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index b30a1bc74fc7..2ce27e8e4f19 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -141,6 +141,13 @@ enum virtchnl_ops { VIRTCHNL_OP_DEL_RSS_CFG = 46, VIRTCHNL_OP_ADD_FDIR_FILTER = 47, VIRTCHNL_OP_DEL_FDIR_FILTER = 48, + VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS = 51, + VIRTCHNL_OP_ADD_VLAN_V2 = 52, + VIRTCHNL_OP_DEL_VLAN_V2 = 53, + VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 = 54, + VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 = 55, + VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 = 56, + VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57, VIRTCHNL_OP_MAX, }; @@ -246,6 +253,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES BIT(6) /* used to negotiate communicating link speeds in Mbps */ #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED BIT(7) +#define VIRTCHNL_VF_OFFLOAD_VLAN_V2 BIT(15) #define VIRTCHNL_VF_OFFLOAD_VLAN BIT(16) #define VIRTCHNL_VF_OFFLOAD_RX_POLLING BIT(17) #define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 BIT(18) @@ -475,6 +483,351 @@ struct virtchnl_vlan_filter_list { VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_vlan_filter_list); +/* This enum is used for all of the VIRTCHNL_VF_OFFLOAD_VLAN_V2_CAPS related + * structures and opcodes. + * + * VIRTCHNL_VLAN_UNSUPPORTED - This field is not supported and if a VF driver + * populates it the PF should return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED. + * + * VIRTCHNL_VLAN_ETHERTYPE_8100 - This field supports 0x8100 ethertype. + * VIRTCHNL_VLAN_ETHERTYPE_88A8 - This field supports 0x88A8 ethertype. + * VIRTCHNL_VLAN_ETHERTYPE_9100 - This field supports 0x9100 ethertype. + * + * VIRTCHNL_VLAN_ETHERTYPE_AND - Used when multiple ethertypes can be supported + * by the PF concurrently. For example, if the PF can support + * VIRTCHNL_VLAN_ETHERTYPE_8100 AND VIRTCHNL_VLAN_ETHERTYPE_88A8 filters it + * would OR the following bits: + * + * VIRTHCNL_VLAN_ETHERTYPE_8100 | + * VIRTCHNL_VLAN_ETHERTYPE_88A8 | + * VIRTCHNL_VLAN_ETHERTYPE_AND; + * + * The VF would interpret this as VLAN filtering can be supported on both 0x8100 + * and 0x88A8 VLAN ethertypes. + * + * VIRTCHNL_ETHERTYPE_XOR - Used when only a single ethertype can be supported + * by the PF concurrently. For example if the PF can support + * VIRTCHNL_VLAN_ETHERTYPE_8100 XOR VIRTCHNL_VLAN_ETHERTYPE_88A8 stripping + * offload it would OR the following bits: + * + * VIRTCHNL_VLAN_ETHERTYPE_8100 | + * VIRTCHNL_VLAN_ETHERTYPE_88A8 | + * VIRTCHNL_VLAN_ETHERTYPE_XOR; + * + * The VF would interpret this as VLAN stripping can be supported on either + * 0x8100 or 0x88a8 VLAN ethertypes. So when requesting VLAN stripping via + * VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 the specified ethertype will override + * the previously set value. + * + * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1 - Used to tell the VF to insert and/or + * strip the VLAN tag using the L2TAG1 field of the Tx/Rx descriptors. + * + * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2 - Used to tell the VF to insert hardware + * offloaded VLAN tags using the L2TAG2 field of the Tx descriptor. + * + * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2 - Used to tell the VF to strip hardware + * offloaded VLAN tags using the L2TAG2_2 field of the Rx descriptor. + * + * VIRTCHNL_VLAN_PRIO - This field supports VLAN priority bits. This is used for + * VLAN filtering if the underlying PF supports it. + * + * VIRTCHNL_VLAN_TOGGLE_ALLOWED - This field is used to say whether a + * certain VLAN capability can be toggled. For example if the underlying PF/CP + * allows the VF to toggle VLAN filtering, stripping, and/or insertion it should + * set this bit along with the supported ethertypes. + */ +enum virtchnl_vlan_support { + VIRTCHNL_VLAN_UNSUPPORTED = 0, + VIRTCHNL_VLAN_ETHERTYPE_8100 = BIT(0), + VIRTCHNL_VLAN_ETHERTYPE_88A8 = BIT(1), + VIRTCHNL_VLAN_ETHERTYPE_9100 = BIT(2), + VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1 = BIT(8), + VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2 = BIT(9), + VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2 = BIT(10), + VIRTCHNL_VLAN_PRIO = BIT(24), + VIRTCHNL_VLAN_FILTER_MASK = BIT(28), + VIRTCHNL_VLAN_ETHERTYPE_AND = BIT(29), + VIRTCHNL_VLAN_ETHERTYPE_XOR = BIT(30), + VIRTCHNL_VLAN_TOGGLE = BIT(31), +}; + +/* This structure is used as part of the VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS + * for filtering, insertion, and stripping capabilities. + * + * If only outer capabilities are supported (for filtering, insertion, and/or + * stripping) then this refers to the outer most or single VLAN from the VF's + * perspective. + * + * If only inner capabilities are supported (for filtering, insertion, and/or + * stripping) then this refers to the outer most or single VLAN from the VF's + * perspective. Functionally this is the same as if only outer capabilities are + * supported. The VF driver is just forced to use the inner fields when + * adding/deleting filters and enabling/disabling offloads (if supported). + * + * If both outer and inner capabilities are supported (for filtering, insertion, + * and/or stripping) then outer refers to the outer most or single VLAN and + * inner refers to the second VLAN, if it exists, in the packet. + * + * There is no support for tunneled VLAN offloads, so outer or inner are never + * referring to a tunneled packet from the VF's perspective. + */ +struct virtchnl_vlan_supported_caps { + u32 outer; + u32 inner; +}; + +/* The PF populates these fields based on the supported VLAN filtering. If a + * field is VIRTCHNL_VLAN_UNSUPPORTED then it's not supported and the PF will + * reject any VIRTCHNL_OP_ADD_VLAN_V2 or VIRTCHNL_OP_DEL_VLAN_V2 messages using + * the unsupported fields. + * + * Also, a VF is only allowed to toggle its VLAN filtering setting if the + * VIRTCHNL_VLAN_TOGGLE bit is set. + * + * The ethertype(s) specified in the ethertype_init field are the ethertypes + * enabled for VLAN filtering. VLAN filtering in this case refers to the outer + * most VLAN from the VF's perspective. If both inner and outer filtering are + * allowed then ethertype_init only refers to the outer most VLAN as only + * VLAN ethertype supported for inner VLAN filtering is + * VIRTCHNL_VLAN_ETHERTYPE_8100. By default, inner VLAN filtering is disabled + * when both inner and outer filtering are allowed. + * + * The max_filters field tells the VF how many VLAN filters it's allowed to have + * at any one time. If it exceeds this amount and tries to add another filter, + * then the request will be rejected by the PF. To prevent failures, the VF + * should keep track of how many VLAN filters it has added and not attempt to + * add more than max_filters. + */ +struct virtchnl_vlan_filtering_caps { + struct virtchnl_vlan_supported_caps filtering_support; + u32 ethertype_init; + u16 max_filters; + u8 pad[2]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vlan_filtering_caps); + +/* This enum is used for the virtchnl_vlan_offload_caps structure to specify + * if the PF supports a different ethertype for stripping and insertion. + * + * VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION - The ethertype(s) specified + * for stripping affect the ethertype(s) specified for insertion and visa versa + * as well. If the VF tries to configure VLAN stripping via + * VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 with VIRTCHNL_VLAN_ETHERTYPE_8100 then + * that will be the ethertype for both stripping and insertion. + * + * VIRTCHNL_ETHERTYPE_MATCH_NOT_REQUIRED - The ethertype(s) specified for + * stripping do not affect the ethertype(s) specified for insertion and visa + * versa. + */ +enum virtchnl_vlan_ethertype_match { + VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION = 0, + VIRTCHNL_ETHERTYPE_MATCH_NOT_REQUIRED = 1, +}; + +/* The PF populates these fields based on the supported VLAN offloads. If a + * field is VIRTCHNL_VLAN_UNSUPPORTED then it's not supported and the PF will + * reject any VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 or + * VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 messages using the unsupported fields. + * + * Also, a VF is only allowed to toggle its VLAN offload setting if the + * VIRTCHNL_VLAN_TOGGLE_ALLOWED bit is set. + * + * The VF driver needs to be aware of how the tags are stripped by hardware and + * inserted by the VF driver based on the level of offload support. The PF will + * populate these fields based on where the VLAN tags are expected to be + * offloaded via the VIRTHCNL_VLAN_TAG_LOCATION_* bits. The VF will need to + * interpret these fields. See the definition of the + * VIRTCHNL_VLAN_TAG_LOCATION_* bits above the virtchnl_vlan_support + * enumeration. + */ +struct virtchnl_vlan_offload_caps { + struct virtchnl_vlan_supported_caps stripping_support; + struct virtchnl_vlan_supported_caps insertion_support; + u32 ethertype_init; + u8 ethertype_match; + u8 pad[3]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_vlan_offload_caps); + +/* VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS + * VF sends this message to determine its VLAN capabilities. + * + * PF will mark which capabilities it supports based on hardware support and + * current configuration. For example, if a port VLAN is configured the PF will + * not allow outer VLAN filtering, stripping, or insertion to be configured so + * it will block these features from the VF. + * + * The VF will need to cross reference its capabilities with the PFs + * capabilities in the response message from the PF to determine the VLAN + * support. + */ +struct virtchnl_vlan_caps { + struct virtchnl_vlan_filtering_caps filtering; + struct virtchnl_vlan_offload_caps offloads; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_vlan_caps); + +struct virtchnl_vlan { + u16 tci; /* tci[15:13] = PCP and tci[11:0] = VID */ + u16 tci_mask; /* only valid if VIRTCHNL_VLAN_FILTER_MASK set in + * filtering caps + */ + u16 tpid; /* 0x8100, 0x88a8, etc. and only type(s) set in + * filtering caps. Note that tpid here does not refer to + * VIRTCHNL_VLAN_ETHERTYPE_*, but it refers to the + * actual 2-byte VLAN TPID + */ + u8 pad[2]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_vlan); + +struct virtchnl_vlan_filter { + struct virtchnl_vlan inner; + struct virtchnl_vlan outer; + u8 pad[16]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(32, virtchnl_vlan_filter); + +/* VIRTCHNL_OP_ADD_VLAN_V2 + * VIRTCHNL_OP_DEL_VLAN_V2 + * + * VF sends these messages to add/del one or more VLAN tag filters for Rx + * traffic. + * + * The PF attempts to add the filters and returns status. + * + * The VF should only ever attempt to add/del virtchnl_vlan_filter(s) using the + * supported fields negotiated via VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS. + */ +struct virtchnl_vlan_filter_list_v2 { + u16 vport_id; + u16 num_elements; + u8 pad[4]; + struct virtchnl_vlan_filter filters[1]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_vlan_filter_list_v2); + +/* VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 + * VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 + * VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 + * VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 + * + * VF sends this message to enable or disable VLAN stripping or insertion. It + * also needs to specify an ethertype. The VF knows which VLAN ethertypes are + * allowed and whether or not it's allowed to enable/disable the specific + * offload via the VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS message. The VF needs to + * parse the virtchnl_vlan_caps.offloads fields to determine which offload + * messages are allowed. + * + * For example, if the PF populates the virtchnl_vlan_caps.offloads in the + * following manner the VF will be allowed to enable and/or disable 0x8100 inner + * VLAN insertion and/or stripping via the opcodes listed above. Inner in this + * case means the outer most or single VLAN from the VF's perspective. This is + * because no outer offloads are supported. See the comments above the + * virtchnl_vlan_supported_caps structure for more details. + * + * virtchnl_vlan_caps.offloads.stripping_support.inner = + * VIRTCHNL_VLAN_TOGGLE | + * VIRTCHNL_VLAN_ETHERTYPE_8100; + * + * virtchnl_vlan_caps.offloads.insertion_support.inner = + * VIRTCHNL_VLAN_TOGGLE | + * VIRTCHNL_VLAN_ETHERTYPE_8100; + * + * In order to enable inner (again note that in this case inner is the outer + * most or single VLAN from the VF's perspective) VLAN stripping for 0x8100 + * VLANs, the VF would populate the virtchnl_vlan_setting structure in the + * following manner and send the VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 message. + * + * virtchnl_vlan_setting.inner_ethertype_setting = + * VIRTCHNL_VLAN_ETHERTYPE_8100; + * + * virtchnl_vlan_setting.vport_id = vport_id or vsi_id assigned to the VF on + * initialization. + * + * The reason that VLAN TPID(s) are not being used for the + * outer_ethertype_setting and inner_ethertype_setting fields is because it's + * possible a device could support VLAN insertion and/or stripping offload on + * multiple ethertypes concurrently, so this method allows a VF to request + * multiple ethertypes in one message using the virtchnl_vlan_support + * enumeration. + * + * For example, if the PF populates the virtchnl_vlan_caps.offloads in the + * following manner the VF will be allowed to enable 0x8100 and 0x88a8 outer + * VLAN insertion and stripping simultaneously. The + * virtchnl_vlan_caps.offloads.ethertype_match field will also have to be + * populated based on what the PF can support. + * + * virtchnl_vlan_caps.offloads.stripping_support.outer = + * VIRTCHNL_VLAN_TOGGLE | + * VIRTCHNL_VLAN_ETHERTYPE_8100 | + * VIRTCHNL_VLAN_ETHERTYPE_88A8 | + * VIRTCHNL_VLAN_ETHERTYPE_AND; + * + * virtchnl_vlan_caps.offloads.insertion_support.outer = + * VIRTCHNL_VLAN_TOGGLE | + * VIRTCHNL_VLAN_ETHERTYPE_8100 | + * VIRTCHNL_VLAN_ETHERTYPE_88A8 | + * VIRTCHNL_VLAN_ETHERTYPE_AND; + * + * In order to enable outer VLAN stripping for 0x8100 and 0x88a8 VLANs, the VF + * would populate the virthcnl_vlan_offload_structure in the following manner + * and send the VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 message. + * + * virtchnl_vlan_setting.outer_ethertype_setting = + * VIRTHCNL_VLAN_ETHERTYPE_8100 | + * VIRTHCNL_VLAN_ETHERTYPE_88A8; + * + * virtchnl_vlan_setting.vport_id = vport_id or vsi_id assigned to the VF on + * initialization. + * + * There is also the case where a PF and the underlying hardware can support + * VLAN offloads on multiple ethertypes, but not concurrently. For example, if + * the PF populates the virtchnl_vlan_caps.offloads in the following manner the + * VF will be allowed to enable and/or disable 0x8100 XOR 0x88a8 outer VLAN + * offloads. The ethertypes must match for stripping and insertion. + * + * virtchnl_vlan_caps.offloads.stripping_support.outer = + * VIRTCHNL_VLAN_TOGGLE | + * VIRTCHNL_VLAN_ETHERTYPE_8100 | + * VIRTCHNL_VLAN_ETHERTYPE_88A8 | + * VIRTCHNL_VLAN_ETHERTYPE_XOR; + * + * virtchnl_vlan_caps.offloads.insertion_support.outer = + * VIRTCHNL_VLAN_TOGGLE | + * VIRTCHNL_VLAN_ETHERTYPE_8100 | + * VIRTCHNL_VLAN_ETHERTYPE_88A8 | + * VIRTCHNL_VLAN_ETHERTYPE_XOR; + * + * virtchnl_vlan_caps.offloads.ethertype_match = + * VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION; + * + * In order to enable outer VLAN stripping for 0x88a8 VLANs, the VF would + * populate the virtchnl_vlan_setting structure in the following manner and send + * the VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2. Also, this will change the + * ethertype for VLAN insertion if it's enabled. So, for completeness, a + * VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 with the same ethertype should be sent. + * + * virtchnl_vlan_setting.outer_ethertype_setting = VIRTHCNL_VLAN_ETHERTYPE_88A8; + * + * virtchnl_vlan_setting.vport_id = vport_id or vsi_id assigned to the VF on + * initialization. + */ +struct virtchnl_vlan_setting { + u32 outer_ethertype_setting; + u32 inner_ethertype_setting; + u16 vport_id; + u8 pad[6]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vlan_setting); + /* VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE * VF sends VSI id and flags. * PF returns status code in retval. @@ -1156,6 +1509,30 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_DEL_FDIR_FILTER: valid_len = sizeof(struct virtchnl_fdir_del); break; + case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS: + break; + case VIRTCHNL_OP_ADD_VLAN_V2: + case VIRTCHNL_OP_DEL_VLAN_V2: + valid_len = sizeof(struct virtchnl_vlan_filter_list_v2); + if (msglen >= valid_len) { + struct virtchnl_vlan_filter_list_v2 *vfl = + (struct virtchnl_vlan_filter_list_v2 *)msg; + + valid_len += (vfl->num_elements - 1) * + sizeof(struct virtchnl_vlan_filter); + + if (vfl->num_elements == 0) { + err_msg_format = true; + break; + } + } + break; + case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2: + case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2: + case VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2: + case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2: + valid_len = sizeof(struct virtchnl_vlan_setting); + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: -- cgit v1.2.3 From 877fee2a0c65a3b0b6ac0e90d7d7718b5a0341d3 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 17 Dec 2021 15:15:15 +0100 Subject: PCI: Convert pci_dev_present() stub to static inline Change the pci_dev_present() stub which is used when CONFIG_PCI is not set from a #define to a static inline stub. Thix should fix clang -Werror builds failing due to errors like this: drivers/platform/x86/thinkpad_acpi.c:4475:35: error: unused variable 'fwbug_cards_ids' [-Werror,-Wunused-const-variable] Where fwbug_cards_ids is an array of pci_device_id passed to pci_dev_present() during a quirk check. Link: https://lore.kernel.org/r/20211217141515.379586-1-hdegoede@redhat.com Reported-by: kernel test robot Signed-off-by: Hans de Goede Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Cc: platform-driver-x86@vger.kernel.org --- include/linux/pci.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 18a75c8e615c..7d825637d7ca 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1775,7 +1775,10 @@ static inline struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from) { return NULL; } -#define pci_dev_present(ids) (0) + +static inline int pci_dev_present(const struct pci_device_id *ids) +{ return 0; } + #define no_pci_devices() (1) #define pci_dev_put(dev) do { } while (0) -- cgit v1.2.3 From ec624fe740b416fb68d536b37fb8eef46f90b5c2 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 14 Dec 2021 19:24:33 +0200 Subject: net/sched: Extend qdisc control block with tc control block BPF layer extends the qdisc control block via struct bpf_skb_data_end and because of that there is no more room to add variables to the qdisc layer control block without going over the skb->cb size. Extend the qdisc control block with a tc control block, and move all tc related variables to there as a pre-step for extending the tc control block with additional members. Signed-off-by: Paul Blakey Signed-off-by: Jakub Kicinski --- include/net/pkt_sched.h | 15 +++++++++++++++ include/net/sch_generic.h | 2 -- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index bf79f3a890af..05f18e81f3e8 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -193,4 +193,19 @@ static inline void skb_txtime_consumed(struct sk_buff *skb) skb->tstamp = ktime_set(0, 0); } +struct tc_skb_cb { + struct qdisc_skb_cb qdisc_cb; + + u16 mru; + bool post_ct; +}; + +static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) +{ + struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb; + + BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb)); + return cb; +} + #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 22179b2fda72..c70e6d2b2fdd 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -447,8 +447,6 @@ struct qdisc_skb_cb { }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; - u16 mru; - bool post_ct; }; typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); -- cgit v1.2.3 From 3849595866166b23bf6a0cb9ff87e06423167f67 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 14 Dec 2021 19:24:34 +0200 Subject: net/sched: flow_dissector: Fix matching on zone id for invalid conns If ct rejects a flow, it removes the conntrack info from the skb. act_ct sets the post_ct variable so the dissector will see this case as an +tracked +invalid state, but the zone id is lost with the conntrack info. To restore the zone id on such cases, set the last executed zone, via the tc control block, when passing ct, and read it back in the dissector if there is no ct info on the skb (invalid connection). Fixes: 7baf2429a1a9 ("net/sched: cls_flower add CT_FLAGS_INVALID flag support") Signed-off-by: Paul Blakey Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 +- include/net/pkt_sched.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c8cb7e697d47..2ecf8cfd2223 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1380,7 +1380,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, u16 *ctinfo_map, size_t mapsize, - bool post_ct); + bool post_ct, u16 zone); void skb_flow_dissect_tunnel_info(const struct sk_buff *skb, struct flow_dissector *flow_dissector, diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 05f18e81f3e8..9e71691c491b 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -198,6 +198,7 @@ struct tc_skb_cb { u16 mru; bool post_ct; + u16 zone; /* Only valid if post_ct = true */ }; static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) -- cgit v1.2.3 From 635d448a1cce4b4ebee52b351052c70434fa90ea Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 14 Dec 2021 19:24:35 +0200 Subject: net: openvswitch: Fix matching zone id for invalid conns arriving from tc Zone id is not restored if we passed ct and ct rejected the connection, as there is no ct info on the skb. Save the zone from tc skb cb to tc skb extension and pass it on to ovs, use that info to restore the zone id for invalid connections. Fixes: d29334c15d33 ("net/sched: act_api: fix miss set post_ct for ovs after do conntrack in act_ct") Signed-off-by: Paul Blakey Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2ecf8cfd2223..4507d77d6941 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -286,6 +286,7 @@ struct nf_bridge_info { struct tc_skb_ext { __u32 chain; __u16 mru; + __u16 zone; bool post_ct; }; #endif -- cgit v1.2.3 From 6e478521df535b9d5ef5eb84d4352f235bbbef99 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 30 Jul 2021 09:56:05 -0400 Subject: iomap,xfs: Convert ->discard_page to ->discard_folio XFS has the only implementation of ->discard_page today, so convert it to use folios in the same patch as converting the API. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- include/linux/iomap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 29491fb9c5ba..5ef5088dbbd8 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -285,7 +285,7 @@ struct iomap_writeback_ops { * Optional, allows the file system to discard state on a page where * we failed to submit any I/O. */ - void (*discard_page)(struct page *page, loff_t fileoff); + void (*discard_folio)(struct folio *folio, loff_t pos); }; struct iomap_writepage_ctx { -- cgit v1.2.3 From e17f7a0bc4daa44a4809f5f2f947aa2aa74d1369 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Dec 2021 09:45:05 +0100 Subject: uio: remove copy_from_iter_flushcache() and copy_mc_to_iter() These two wrappers are never used. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211215084508.435401-2-hch@lst.de Signed-off-by: Dan Williams --- include/linux/uio.h | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/uio.h b/include/linux/uio.h index 6350354f97e9..494d552c1d66 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -196,7 +196,7 @@ bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE /* * Note, users like pmem that depend on the stricter semantics of - * copy_from_iter_flushcache() than copy_from_iter_nocache() must check for + * _copy_from_iter_flushcache() than _copy_from_iter_nocache() must check for * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the * destination is flushed from the cache on return. */ @@ -211,24 +211,6 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); #define _copy_mc_to_iter _copy_to_iter #endif -static __always_inline __must_check -size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) -{ - if (unlikely(!check_copy_size(addr, bytes, false))) - return 0; - else - return _copy_from_iter_flushcache(addr, bytes, i); -} - -static __always_inline __must_check -size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i) -{ - if (unlikely(!check_copy_size(addr, bytes, true))) - return 0; - else - return _copy_mc_to_iter(addr, bytes, i); -} - size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); -- cgit v1.2.3 From fd1d00ec92002d8fe28ca981a72395eaa7ae3d11 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Dec 2021 09:45:06 +0100 Subject: dax: simplify dax_synchronous and set_dax_synchronous Remove the pointless wrappers. Signed-off-by: Christoph Hellwig Reviewed-by: Pankaj Gupta Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/20211215084508.435401-3-hch@lst.de Signed-off-by: Dan Williams --- include/linux/dax.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index 87ae4c9b1d65..3bd1fdb5d5f4 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -48,16 +48,8 @@ void put_dax(struct dax_device *dax_dev); void kill_dax(struct dax_device *dax_dev); void dax_write_cache(struct dax_device *dax_dev, bool wc); bool dax_write_cache_enabled(struct dax_device *dax_dev); -bool __dax_synchronous(struct dax_device *dax_dev); -static inline bool dax_synchronous(struct dax_device *dax_dev) -{ - return __dax_synchronous(dax_dev); -} -void __set_dax_synchronous(struct dax_device *dax_dev); -static inline void set_dax_synchronous(struct dax_device *dax_dev) -{ - __set_dax_synchronous(dax_dev); -} +bool dax_synchronous(struct dax_device *dax_dev); +void set_dax_synchronous(struct dax_device *dax_dev); /* * Check if given mapping is supported by the file / underlying device. */ -- cgit v1.2.3 From 30c6828a17a572aeb9e3a3bacce05fdcf1106541 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Dec 2021 09:45:07 +0100 Subject: dax: remove the DAXDEV_F_SYNC flag Remove the DAXDEV_F_SYNC flag and thus the flags argument to alloc_dax and just let the drivers call set_dax_synchronous directly. Signed-off-by: Christoph Hellwig Reviewed-by: Pankaj Gupta Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/20211215084508.435401-4-hch@lst.de Signed-off-by: Dan Williams --- include/linux/dax.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index 3bd1fdb5d5f4..c04f46478e3b 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -6,9 +6,6 @@ #include #include -/* Flag for synchronous flush */ -#define DAXDEV_F_SYNC (1UL << 0) - typedef unsigned long dax_entry_t; struct dax_device; @@ -42,8 +39,7 @@ struct dax_operations { }; #if IS_ENABLED(CONFIG_DAX) -struct dax_device *alloc_dax(void *private, const struct dax_operations *ops, - unsigned long flags); +struct dax_device *alloc_dax(void *private, const struct dax_operations *ops); void put_dax(struct dax_device *dax_dev); void kill_dax(struct dax_device *dax_dev); void dax_write_cache(struct dax_device *dax_dev, bool wc); @@ -64,7 +60,7 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, } #else static inline struct dax_device *alloc_dax(void *private, - const struct dax_operations *ops, unsigned long flags) + const struct dax_operations *ops) { /* * Callers should check IS_ENABLED(CONFIG_DAX) to know if this -- cgit v1.2.3 From 7ac5360cd4d02cc7e0eaf10867f599e041822f12 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Dec 2021 09:45:08 +0100 Subject: dax: remove the copy_from_iter and copy_to_iter methods These methods indirect the actual DAX read/write path. In the end pmem uses magic flush and mc safe variants and fuse and dcssblk use plain ones while device mapper picks redirects to the underlying device. Add set_dax_nocache() and set_dax_nomc() APIs to control which copy routines are used to remove indirect call from the read/write fast path as well as a lot of boilerplate code. Signed-off-by: Christoph Hellwig Reviewed-by: Vivek Goyal [virtiofs] Link: https://lore.kernel.org/r/20211215084508.435401-5-hch@lst.de Signed-off-by: Dan Williams --- include/linux/dax.h | 9 +++------ include/linux/device-mapper.h | 4 ---- 2 files changed, 3 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index c04f46478e3b..9fc5f99a0ae2 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -28,12 +28,6 @@ struct dax_operations { */ bool (*dax_supported)(struct dax_device *, struct block_device *, int, sector_t, sector_t); - /* copy_from_iter: required operation for fs-dax direct-i/o */ - size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, - struct iov_iter *); - /* copy_to_iter: required operation for fs-dax direct-i/o */ - size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, - struct iov_iter *); /* zero_page_range: required operation. Zero page range */ int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); }; @@ -95,6 +89,9 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, } #endif +void set_dax_nocache(struct dax_device *dax_dev); +void set_dax_nomc(struct dax_device *dax_dev); + struct writeback_control; #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a7df155ea49b..b26fecf6c8e8 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -147,8 +147,6 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); */ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); -typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i); typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages); @@ -200,8 +198,6 @@ struct target_type { dm_iterate_devices_fn iterate_devices; dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; - dm_dax_copy_iter_fn dax_copy_from_iter; - dm_dax_copy_iter_fn dax_copy_to_iter; dm_dax_zero_page_range_fn dax_zero_page_range; /* For internal device-mapper use. */ -- cgit v1.2.3 From d639b9d13a39cf15639cbe6e8b2c43eb60148a73 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Thu, 16 Dec 2021 16:31:44 -0800 Subject: bpf: Introduce composable reg, ret and arg types. There are some common properties shared between bpf reg, ret and arg values. For instance, a value may be a NULL pointer, or a pointer to a read-only memory. Previously, to express these properties, enumeration was used. For example, in order to test whether a reg value can be NULL, reg_type_may_be_null() simply enumerates all types that are possibly NULL. The problem of this approach is that it's not scalable and causes a lot of duplication. These properties can be combined, for example, a type could be either MAYBE_NULL or RDONLY, or both. This patch series rewrites the layout of reg_type, arg_type and ret_type, so that common properties can be extracted and represented as composable flag. For example, one can write ARG_PTR_TO_MEM | PTR_MAYBE_NULL which is equivalent to the previous ARG_PTR_TO_MEM_OR_NULL The type ARG_PTR_TO_MEM are called "base type" in this patch. Base types can be extended with flags. A flag occupies the higher bits while base types sits in the lower bits. This patch in particular sets up a set of macro for this purpose. The following patches will rewrite arg_types, ret_types and reg_types respectively. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211217003152.48334-2-haoluo@google.com --- include/linux/bpf.h | 42 ++++++++++++++++++++++++++++++++++++++++++ include/linux/bpf_verifier.h | 13 +++++++++++++ 2 files changed, 55 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 965fffaf0308..41bb3687cc85 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -297,6 +297,29 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0, extern const struct bpf_map_ops bpf_map_offload_ops; +/* bpf_type_flag contains a set of flags that are applicable to the values of + * arg_type, ret_type and reg_type. For example, a pointer value may be null, + * or a memory is read-only. We classify types into two categories: base types + * and extended types. Extended types are base types combined with a type flag. + * + * Currently there are no more than 32 base types in arg_type, ret_type and + * reg_types. + */ +#define BPF_BASE_TYPE_BITS 8 + +enum bpf_type_flag { + /* PTR may be NULL. */ + PTR_MAYBE_NULL = BIT(0 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_LAST_FLAG = PTR_MAYBE_NULL, +}; + +/* Max number of base types. */ +#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) + +/* Max number of all types. */ +#define BPF_TYPE_LIMIT (__BPF_TYPE_LAST_FLAG | (__BPF_TYPE_LAST_FLAG - 1)) + /* function argument constraints */ enum bpf_arg_type { ARG_DONTCARE = 0, /* unused argument in helper function */ @@ -343,7 +366,13 @@ enum bpf_arg_type { ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ __BPF_ARG_TYPE_MAX, + + /* This must be the last entry. Its purpose is to ensure the enum is + * wide enough to hold the higher bits reserved for bpf_type_flag. + */ + __BPF_ARG_TYPE_LIMIT = BPF_TYPE_LIMIT, }; +static_assert(__BPF_ARG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); /* type of values returned from helper functions */ enum bpf_return_type { @@ -359,7 +388,14 @@ enum bpf_return_type { RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ + __BPF_RET_TYPE_MAX, + + /* This must be the last entry. Its purpose is to ensure the enum is + * wide enough to hold the higher bits reserved for bpf_type_flag. + */ + __BPF_RET_TYPE_LIMIT = BPF_TYPE_LIMIT, }; +static_assert(__BPF_RET_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL @@ -461,7 +497,13 @@ enum bpf_reg_type { PTR_TO_FUNC, /* reg points to a bpf program function */ PTR_TO_MAP_KEY, /* reg points to a map element key */ __BPF_REG_TYPE_MAX, + + /* This must be the last entry. Its purpose is to ensure the enum is + * wide enough to hold the higher bits reserved for bpf_type_flag. + */ + __BPF_REG_TYPE_LIMIT = BPF_TYPE_LIMIT, }; +static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); /* The information passed from prog-specific *_is_valid_access * back to the verifier. diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index ee931398f311..34e4ceaca3c7 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -546,5 +546,18 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, struct bpf_attach_target_info *tgt_info); void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab); +#define BPF_BASE_TYPE_MASK GENMASK(BPF_BASE_TYPE_BITS - 1, 0) + +/* extract base type from bpf_{arg, return, reg}_type. */ +static inline u32 base_type(u32 type) +{ + return type & BPF_BASE_TYPE_MASK; +} + +/* extract flags from an extended type. See bpf_type_flag in bpf.h. */ +static inline u32 type_flag(u32 type) +{ + return type & ~BPF_BASE_TYPE_MASK; +} #endif /* _LINUX_BPF_VERIFIER_H */ -- cgit v1.2.3 From 48946bd6a5d695c50b34546864b79c1f910a33c1 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Thu, 16 Dec 2021 16:31:45 -0800 Subject: bpf: Replace ARG_XXX_OR_NULL with ARG_XXX | PTR_MAYBE_NULL We have introduced a new type to make bpf_arg composable, by reserving high bits of bpf_arg to represent flags of a type. One of the flags is PTR_MAYBE_NULL which indicates a pointer may be NULL. When applying this flag to an arg_type, it means the arg can take NULL pointer. This patch switches the qualified arg_types to use this flag. The arg_types changed in this patch include: 1. ARG_PTR_TO_MAP_VALUE_OR_NULL 2. ARG_PTR_TO_MEM_OR_NULL 3. ARG_PTR_TO_CTX_OR_NULL 4. ARG_PTR_TO_SOCKET_OR_NULL 5. ARG_PTR_TO_ALLOC_MEM_OR_NULL 6. ARG_PTR_TO_STACK_OR_NULL This patch does not eliminate the use of these arg_types, instead it makes them an alias to the 'ARG_XXX | PTR_MAYBE_NULL'. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211217003152.48334-3-haoluo@google.com --- include/linux/bpf.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 41bb3687cc85..765bd7cc4272 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -331,13 +331,11 @@ enum bpf_arg_type { ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ - ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */ /* the following constraints used to prototype bpf_memcmp() and other * functions that access data on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ - ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */ ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, * helper function must fill all bytes or clear * them in error case. @@ -347,26 +345,31 @@ enum bpf_arg_type { ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ ARG_PTR_TO_CTX, /* pointer to context */ - ARG_PTR_TO_CTX_OR_NULL, /* pointer to context or NULL */ ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ ARG_PTR_TO_INT, /* pointer to int */ ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ - ARG_PTR_TO_SOCKET_OR_NULL, /* pointer to bpf_sock (fullsock) or NULL */ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ - ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ - ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ + ARG_PTR_TO_STACK, /* pointer to stack */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ __BPF_ARG_TYPE_MAX, + /* Extended arg_types. */ + ARG_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MAP_VALUE, + ARG_PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MEM, + ARG_PTR_TO_CTX_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_CTX, + ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, + ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM, + ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, + /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. */ -- cgit v1.2.3 From 3c4807322660d4290ac9062c034aed6b87243861 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Thu, 16 Dec 2021 16:31:46 -0800 Subject: bpf: Replace RET_XXX_OR_NULL with RET_XXX | PTR_MAYBE_NULL We have introduced a new type to make bpf_ret composable, by reserving high bits to represent flags. One of the flag is PTR_MAYBE_NULL, which indicates a pointer may be NULL. When applying this flag to ret_types, it means the returned value could be a NULL pointer. This patch switches the qualified arg_types to use this flag. The ret_types changed in this patch include: 1. RET_PTR_TO_MAP_VALUE_OR_NULL 2. RET_PTR_TO_SOCKET_OR_NULL 3. RET_PTR_TO_TCP_SOCK_OR_NULL 4. RET_PTR_TO_SOCK_COMMON_OR_NULL 5. RET_PTR_TO_ALLOC_MEM_OR_NULL 6. RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL 7. RET_PTR_TO_BTF_ID_OR_NULL This patch doesn't eliminate the use of these names, instead it makes them aliases to 'RET_PTR_TO_XXX | PTR_MAYBE_NULL'. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211217003152.48334-4-haoluo@google.com --- include/linux/bpf.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 765bd7cc4272..975a1d5951bd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -382,17 +382,22 @@ enum bpf_return_type { RET_INTEGER, /* function returns integer */ RET_VOID, /* function doesn't return anything */ RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ - RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ - RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ - RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ - RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ - RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ - RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ - RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ + RET_PTR_TO_SOCKET, /* returns a pointer to a socket */ + RET_PTR_TO_TCP_SOCK, /* returns a pointer to a tcp_sock */ + RET_PTR_TO_SOCK_COMMON, /* returns a pointer to a sock_common */ + RET_PTR_TO_ALLOC_MEM, /* returns a pointer to dynamically allocated memory */ RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ __BPF_RET_TYPE_MAX, + /* Extended ret_types. */ + RET_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MAP_VALUE, + RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET, + RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, + RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, + RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, + RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, + /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. */ -- cgit v1.2.3 From c25b2ae136039ffa820c26138ed4a5e5f3ab3841 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Thu, 16 Dec 2021 16:31:47 -0800 Subject: bpf: Replace PTR_TO_XXX_OR_NULL with PTR_TO_XXX | PTR_MAYBE_NULL We have introduced a new type to make bpf_reg composable, by allocating bits in the type to represent flags. One of the flags is PTR_MAYBE_NULL which indicates a pointer may be NULL. This patch switches the qualified reg_types to use this flag. The reg_types changed in this patch include: 1. PTR_TO_MAP_VALUE_OR_NULL 2. PTR_TO_SOCKET_OR_NULL 3. PTR_TO_SOCK_COMMON_OR_NULL 4. PTR_TO_TCP_SOCK_OR_NULL 5. PTR_TO_BTF_ID_OR_NULL 6. PTR_TO_MEM_OR_NULL 7. PTR_TO_RDONLY_BUF_OR_NULL 8. PTR_TO_RDWR_BUF_OR_NULL Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211217003152.48334-5-haoluo@google.com --- include/linux/bpf.h | 18 +++++++++--------- include/linux/bpf_verifier.h | 4 ++++ 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 975a1d5951bd..c3de62267b84 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -465,18 +465,15 @@ enum bpf_reg_type { PTR_TO_CTX, /* reg points to bpf_context */ CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ PTR_TO_MAP_VALUE, /* reg points to map element value */ - PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ + PTR_TO_MAP_KEY, /* reg points to a map element key */ PTR_TO_STACK, /* reg == frame_pointer + offset */ PTR_TO_PACKET_META, /* skb->data - meta_len */ PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ PTR_TO_SOCKET, /* reg points to struct bpf_sock */ - PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ PTR_TO_SOCK_COMMON, /* reg points to sock_common */ - PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ - PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ /* PTR_TO_BTF_ID points to a kernel struct that does not need @@ -494,18 +491,21 @@ enum bpf_reg_type { * been checked for null. Used primarily to inform the verifier * an explicit null check is required for this struct. */ - PTR_TO_BTF_ID_OR_NULL, PTR_TO_MEM, /* reg points to valid memory region */ - PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */ PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */ - PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ - PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ PTR_TO_FUNC, /* reg points to a bpf program function */ - PTR_TO_MAP_KEY, /* reg points to a map element key */ __BPF_REG_TYPE_MAX, + /* Extended reg_types. */ + PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | PTR_TO_MAP_VALUE, + PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET, + PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON, + PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK, + PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID, + PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | PTR_TO_MEM, + /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 34e4ceaca3c7..143401d4c9d9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -18,6 +18,8 @@ * that converting umax_value to int cannot overflow. */ #define BPF_MAX_VAR_SIZ (1 << 29) +/* size of type_str_buf in bpf_verifier. */ +#define TYPE_STR_BUF_LEN 64 /* Liveness marks, used for registers and spilled-regs (in stack slots). * Read marks propagate upwards until they find a write mark; they record that @@ -484,6 +486,8 @@ struct bpf_verifier_env { /* Same as scratched_regs but for stack slots */ u64 scratched_stack_slots; u32 prev_log_len, prev_insn_print_len; + /* buffer used in reg_type_str() to generate reg_type string */ + char type_str_buf[TYPE_STR_BUF_LEN]; }; __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, -- cgit v1.2.3 From 20b2aff4bc15bda809f994761d5719827d66c0b4 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Thu, 16 Dec 2021 16:31:48 -0800 Subject: bpf: Introduce MEM_RDONLY flag This patch introduce a flag MEM_RDONLY to tag a reg value pointing to read-only memory. It makes the following changes: 1. PTR_TO_RDWR_BUF -> PTR_TO_BUF 2. PTR_TO_RDONLY_BUF -> PTR_TO_BUF | MEM_RDONLY Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211217003152.48334-6-haoluo@google.com --- include/linux/bpf.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c3de62267b84..126048110bdb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -311,7 +311,10 @@ enum bpf_type_flag { /* PTR may be NULL. */ PTR_MAYBE_NULL = BIT(0 + BPF_BASE_TYPE_BITS), - __BPF_TYPE_LAST_FLAG = PTR_MAYBE_NULL, + /* MEM is read-only. */ + MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_LAST_FLAG = MEM_RDONLY, }; /* Max number of base types. */ @@ -492,8 +495,7 @@ enum bpf_reg_type { * an explicit null check is required for this struct. */ PTR_TO_MEM, /* reg points to valid memory region */ - PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */ - PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ + PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ PTR_TO_FUNC, /* reg points to a bpf program function */ __BPF_REG_TYPE_MAX, -- cgit v1.2.3 From cf9f2f8d62eca810afbd1ee6cc0800202b000e57 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Thu, 16 Dec 2021 16:31:49 -0800 Subject: bpf: Convert PTR_TO_MEM_OR_NULL to composable types. Remove PTR_TO_MEM_OR_NULL and replace it with PTR_TO_MEM combined with flag PTR_MAYBE_NULL. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211217003152.48334-7-haoluo@google.com --- include/linux/bpf.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 126048110bdb..567d83bf28f9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -506,7 +506,6 @@ enum bpf_reg_type { PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON, PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK, PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID, - PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | PTR_TO_MEM, /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. -- cgit v1.2.3 From 216e3cd2f28dbbf1fe86848e0e29e6693b9f0a20 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Thu, 16 Dec 2021 16:31:51 -0800 Subject: bpf: Add MEM_RDONLY for helper args that are pointers to rdonly mem. Some helper functions may modify its arguments, for example, bpf_d_path, bpf_get_stack etc. Previously, their argument types were marked as ARG_PTR_TO_MEM, which is compatible with read-only mem types, such as PTR_TO_RDONLY_BUF. Therefore it's legitimate, but technically incorrect, to modify a read-only memory by passing it into one of such helper functions. This patch tags the bpf_args compatible with immutable memory with MEM_RDONLY flag. The arguments that don't have this flag will be only compatible with mutable memory types, preventing the helper from modifying a read-only memory. The bpf_args that have MEM_RDONLY are compatible with both mutable memory and immutable memory. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211217003152.48334-9-haoluo@google.com --- include/linux/bpf.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 567d83bf28f9..26753139d5b4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -311,7 +311,9 @@ enum bpf_type_flag { /* PTR may be NULL. */ PTR_MAYBE_NULL = BIT(0 + BPF_BASE_TYPE_BITS), - /* MEM is read-only. */ + /* MEM is read-only. When applied on bpf_arg, it indicates the arg is + * compatible with both mutable and immutable memory. + */ MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS), __BPF_TYPE_LAST_FLAG = MEM_RDONLY, -- cgit v1.2.3 From 5a9959008fb63191538ab0f7800f4cf26afe7750 Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Fri, 17 Dec 2021 19:16:19 +0100 Subject: flow_offload: add index to flow_action_entry structure Add index to flow_action_entry structure and delete index from police and gate child structure. We make this change to offload tc action for driver to identify a tc action. Signed-off-by: Baowen Zheng Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/flow_offload.h | 3 +-- include/net/tc_act/tc_gate.h | 5 ----- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 3961461d9c8b..2271da5aa8ee 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -197,6 +197,7 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie); struct flow_action_entry { enum flow_action_id id; + u32 hw_index; enum flow_action_hw_stats hw_stats; action_destr destructor; void *destructor_priv; @@ -232,7 +233,6 @@ struct flow_action_entry { bool truncate; } sample; struct { /* FLOW_ACTION_POLICE */ - u32 index; u32 burst; u64 rate_bytes_ps; u64 burst_pkt; @@ -267,7 +267,6 @@ struct flow_action_entry { u8 ttl; } mpls_mangle; struct { - u32 index; s32 prio; u64 basetime; u64 cycletime; diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h index 8bc6be81a7ad..c8fa11ebb397 100644 --- a/include/net/tc_act/tc_gate.h +++ b/include/net/tc_act/tc_gate.h @@ -60,11 +60,6 @@ static inline bool is_tcf_gate(const struct tc_action *a) return false; } -static inline u32 tcf_gate_index(const struct tc_action *a) -{ - return a->tcfa_index; -} - static inline s32 tcf_gate_prio(const struct tc_action *a) { s32 tcfg_prio; -- cgit v1.2.3 From 9c1c0e124ca25589e6cf040e105ab0857f9e9c3e Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Fri, 17 Dec 2021 19:16:20 +0100 Subject: flow_offload: rename offload functions with offload instead of flow To improves readability, we rename offload functions with offload instead of flow. The term flow is related to exact matches, so we rename these functions with offload. We make this change to facilitate single action offload functions naming. Signed-off-by: Baowen Zheng Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index cebc1bd713b6..5d4ff76d37e2 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -536,9 +536,9 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) return ifindex == skb->skb_iif; } -int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts); -void tc_cleanup_flow_action(struct flow_action *flow_action); +int tc_setup_offload_action(struct flow_action *flow_action, + const struct tcf_exts *exts); +void tc_cleanup_offload_action(struct flow_action *flow_action); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held); -- cgit v1.2.3 From c54e1d920f04d528ab558f09326a78d2ae59e323 Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Fri, 17 Dec 2021 19:16:21 +0100 Subject: flow_offload: add ops to tc_action_ops for flow action setup Add a new ops to tc_action_ops for flow action setup. Refactor function tc_setup_flow_action to use this new ops. We make this change to facilitate to add standalone action module. We will also use this ops to offload action independent of filter in following patch. Signed-off-by: Baowen Zheng Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/act_api.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index b5b624c7e488..b418bb0e44e0 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -88,6 +88,16 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm) dtm->expires = jiffies_to_clock_t(stm->expires); } +static inline enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats) +{ + if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY)) + return FLOW_ACTION_HW_STATS_DONT_CARE; + else if (!hw_stats) + return FLOW_ACTION_HW_STATS_DISABLED; + + return hw_stats; +} + #ifdef CONFIG_NET_CLS_ACT #define ACT_P_CREATED 1 @@ -121,6 +131,8 @@ struct tc_action_ops { struct psample_group * (*get_psample_group)(const struct tc_action *a, tc_action_priv_destructor *destructor); + int (*offload_act_setup)(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind); }; struct tc_action_net { -- cgit v1.2.3 From 8cbfe939abe905280279e84a297b1cb34e0d0ec9 Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Fri, 17 Dec 2021 19:16:22 +0100 Subject: flow_offload: allow user to offload tc action to net device Use flow_indr_dev_register/flow_indr_dev_setup_offload to offload tc action. We need to call tc_cleanup_flow_action to clean up tc action entry since in tc_setup_action, some actions may hold dev refcnt, especially the mirror action. Signed-off-by: Baowen Zheng Signed-off-by: Louis Peens Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/flow_offload.h | 17 +++++++++++++++++ include/net/pkt_cls.h | 5 +++++ 3 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a419718612c6..8b0bdeb4734e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -920,6 +920,7 @@ enum tc_setup_type { TC_SETUP_QDISC_TBF, TC_SETUP_QDISC_FIFO, TC_SETUP_QDISC_HTB, + TC_SETUP_ACT, }; /* These structures hold the attributes of bpf state that are being passed diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 2271da5aa8ee..5b8c54eb7a6b 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -551,6 +551,23 @@ struct flow_cls_offload { u32 classid; }; +enum offload_act_command { + FLOW_ACT_REPLACE, + FLOW_ACT_DESTROY, + FLOW_ACT_STATS, +}; + +struct flow_offload_action { + struct netlink_ext_ack *extack; /* NULL in FLOW_ACT_STATS process*/ + enum offload_act_command command; + enum flow_action_id id; + u32 index; + struct flow_stats stats; + struct flow_action action; +}; + +struct flow_offload_action *offload_action_alloc(unsigned int num_actions); + static inline struct flow_rule * flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd) { diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 5d4ff76d37e2..1bfb616ea759 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -262,6 +262,9 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) for (; 0; (void)(i), (void)(a), (void)(exts)) #endif +#define tcf_act_for_each_action(i, a, actions) \ + for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = actions[i]); i++) + static inline void tcf_exts_stats_update(const struct tcf_exts *exts, u64 bytes, u64 packets, u64 drops, u64 lastuse, @@ -539,6 +542,8 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) int tc_setup_offload_action(struct flow_action *flow_action, const struct tcf_exts *exts); void tc_cleanup_offload_action(struct flow_action *flow_action); +int tc_setup_action(struct flow_action *flow_action, + struct tc_action *actions[]); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held); -- cgit v1.2.3 From 7adc576512110ef32b0424a727ee1d04359fc205 Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Fri, 17 Dec 2021 19:16:23 +0100 Subject: flow_offload: add skip_hw and skip_sw to control if offload the action We add skip_hw and skip_sw for user to control if offload the action to hardware. We also add in_hw_count for user to indicate if the action is offloaded to any hardware. Signed-off-by: Baowen Zheng Signed-off-by: Simon Horman Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 1 + include/uapi/linux/pkt_cls.h | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index b418bb0e44e0..15c6a881817d 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -44,6 +44,7 @@ struct tc_action { u8 hw_stats; u8 used_hw_stats; bool used_hw_stats_valid; + u32 in_hw_count; }; #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 6836ccb9c45d..ee38b35c3f57 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -19,13 +19,16 @@ enum { TCA_ACT_FLAGS, TCA_ACT_HW_STATS, TCA_ACT_USED_HW_STATS, + TCA_ACT_IN_HW_COUNT, __TCA_ACT_MAX }; /* See other TCA_ACT_FLAGS_ * flags in include/net/act_api.h. */ -#define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for - * actions stats. - */ +#define TCA_ACT_FLAGS_NO_PERCPU_STATS (1 << 0) /* Don't use percpu allocator for + * actions stats. + */ +#define TCA_ACT_FLAGS_SKIP_HW (1 << 1) /* don't offload action to HW */ +#define TCA_ACT_FLAGS_SKIP_SW (1 << 2) /* don't use action in SW */ /* tca HW stats type * When user does not pass the attribute, he does not care. -- cgit v1.2.3 From bcd64368584bab38bdd095c88df702fb64271694 Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Fri, 17 Dec 2021 19:16:24 +0100 Subject: flow_offload: rename exts stats update functions with hw Rename exts stats update functions with hw for readability. We make this change also to update stats from hw for an action when it is offloaded to hw as a single action. Signed-off-by: Baowen Zheng Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 1bfb616ea759..efdfab8eb00c 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -266,9 +266,9 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = actions[i]); i++) static inline void -tcf_exts_stats_update(const struct tcf_exts *exts, - u64 bytes, u64 packets, u64 drops, u64 lastuse, - u8 used_hw_stats, bool used_hw_stats_valid) +tcf_exts_hw_stats_update(const struct tcf_exts *exts, + u64 bytes, u64 packets, u64 drops, u64 lastuse, + u8 used_hw_stats, bool used_hw_stats_valid) { #ifdef CONFIG_NET_CLS_ACT int i; -- cgit v1.2.3 From c7a66f8d8a946edafb38150480145ab9801e4e52 Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Fri, 17 Dec 2021 19:16:25 +0100 Subject: flow_offload: add process to update action stats from hardware When collecting stats for actions update them using both hardware and software counters. Stats update process should not run in context of preempt_disable. Signed-off-by: Baowen Zheng Signed-off-by: Louis Peens Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/act_api.h | 1 + include/net/pkt_cls.h | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 15c6a881817d..20104dfdd57c 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -253,6 +253,7 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, u64 drops, bool hw); int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); +int tcf_action_update_hw_stats(struct tc_action *action); int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, struct tcf_chain **handle, struct netlink_ext_ack *newchain); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index efdfab8eb00c..337a3ebb4666 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -273,18 +273,20 @@ tcf_exts_hw_stats_update(const struct tcf_exts *exts, #ifdef CONFIG_NET_CLS_ACT int i; - preempt_disable(); - for (i = 0; i < exts->nr_actions; i++) { struct tc_action *a = exts->actions[i]; - tcf_action_stats_update(a, bytes, packets, drops, - lastuse, true); - a->used_hw_stats = used_hw_stats; - a->used_hw_stats_valid = used_hw_stats_valid; - } + /* if stats from hw, just skip */ + if (tcf_action_update_hw_stats(a)) { + preempt_disable(); + tcf_action_stats_update(a, bytes, packets, drops, + lastuse, true); + preempt_enable(); - preempt_enable(); + a->used_hw_stats = used_hw_stats; + a->used_hw_stats_valid = used_hw_stats_valid; + } + } #endif } -- cgit v1.2.3 From 13926d19a11e303f12571df61b7bb64f17cb4561 Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Fri, 17 Dec 2021 19:16:27 +0100 Subject: flow_offload: add reoffload process to update hw_count Add reoffload process to update hw_count when driver is inserted or removed. We will delete the action if it is with skip_sw flag and not offloaded to any hardware in reoffload process. When reoffloading actions, we still offload the actions that are added independent of filters. Signed-off-by: Baowen Zheng Signed-off-by: Louis Peens Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/act_api.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 20104dfdd57c..0f5f69deb3ce 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -254,6 +255,8 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); int tcf_action_update_hw_stats(struct tc_action *action); +int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, + void *cb_priv, bool add); int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, struct tcf_chain **handle, struct netlink_ext_ack *newchain); @@ -265,6 +268,14 @@ DECLARE_STATIC_KEY_FALSE(tcf_frag_xmit_count); #endif int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); + +#else /* !CONFIG_NET_CLS_ACT */ + +static inline int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, + void *cb_priv, bool add) { + return 0; +} + #endif /* CONFIG_NET_CLS_ACT */ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, -- cgit v1.2.3 From c86e0209dc7725c91583e3c0c78c3da6a28daeb4 Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Fri, 17 Dec 2021 19:16:28 +0100 Subject: flow_offload: validate flags of filter and actions Add process to validate flags of filter and actions when adding a tc filter. We need to prevent adding filter with flags conflicts with its actions. Signed-off-by: Baowen Zheng Signed-off-by: Louis Peens Signed-off-by: Simon Horman Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 2 +- include/net/pkt_cls.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 0f5f69deb3ce..3049cb69c025 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -203,7 +203,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, struct tc_action *actions[], int init_res[], size_t *attr_size, - u32 flags, struct netlink_ext_ack *extack); + u32 flags, u32 fl_flags, struct netlink_ext_ack *extack); struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police, bool rtnl_held, struct netlink_ext_ack *extack); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 337a3ebb4666..ebef45e821af 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -330,6 +330,9 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, u32 flags, struct netlink_ext_ack *extack); +int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **tb, + struct nlattr *rate_tlv, struct tcf_exts *exts, + u32 flags, u32 fl_flags, struct netlink_ext_ack *extack); void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); -- cgit v1.2.3 From a949f2cf1ab9b3afd894427a64fce24fd8bae0a6 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Fri, 17 Dec 2021 18:15:43 +0200 Subject: dt-bindings: clock: Add bindings for Exynos850 sysreg clocks System Register is used to configure system behavior, like USI protocol, etc. SYSREG clocks should be provided to corresponding syscon nodes, to make it possible to modify SYSREG registers. While at it, add also missing PMU and GPIO clocks, which looks necessary and might be needed for corresponding Exynos850 features soon. Signed-off-by: Sam Protsenko Signed-off-by: Sylwester Nawrocki Reviewed-by: Krzysztof Kozlowski Acked-by: Rob Herring Acked-by: Chanwoo Choi Link: https://lore.kernel.org/r/20211217161549.24836-2-semen.protsenko@linaro.org --- include/dt-bindings/clock/exynos850.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/exynos850.h b/include/dt-bindings/clock/exynos850.h index 8aa5e82af0d3..0b6a3c6a7c90 100644 --- a/include/dt-bindings/clock/exynos850.h +++ b/include/dt-bindings/clock/exynos850.h @@ -82,7 +82,10 @@ #define CLK_GOUT_I3C_PCLK 19 #define CLK_GOUT_I3C_SCLK 20 #define CLK_GOUT_SPEEDY_PCLK 21 -#define APM_NR_CLK 22 +#define CLK_GOUT_GPIO_ALIVE_PCLK 22 +#define CLK_GOUT_PMU_ALIVE_PCLK 23 +#define CLK_GOUT_SYSREG_APM_PCLK 24 +#define APM_NR_CLK 25 /* CMU_CMGP */ #define CLK_RCO_CMGP 1 @@ -99,7 +102,8 @@ #define CLK_GOUT_CMGP_USI0_PCLK 12 #define CLK_GOUT_CMGP_USI1_IPCLK 13 #define CLK_GOUT_CMGP_USI1_PCLK 14 -#define CMGP_NR_CLK 15 +#define CLK_GOUT_SYSREG_CMGP_PCLK 15 +#define CMGP_NR_CLK 16 /* CMU_HSI */ #define CLK_MOUT_HSI_BUS_USER 1 @@ -167,7 +171,9 @@ #define CLK_GOUT_MMC_EMBD_SDCLKIN 10 #define CLK_GOUT_SSS_ACLK 11 #define CLK_GOUT_SSS_PCLK 12 -#define CORE_NR_CLK 13 +#define CLK_GOUT_GPIO_CORE_PCLK 13 +#define CLK_GOUT_SYSREG_CORE_PCLK 14 +#define CORE_NR_CLK 15 /* CMU_DPU */ #define CLK_MOUT_DPU_USER 1 -- cgit v1.2.3 From 591020a516720e9eba1c4b1748cb73b6748e445f Mon Sep 17 00:00:00 2001 From: David Virag Date: Mon, 6 Dec 2021 16:31:15 +0100 Subject: dt-bindings: clock: Add bindings definitions for Exynos7885 CMU Just like on Exynos850, the clock controller driver is designed to have separate instances for each particular CMU, so clock IDs start from 1 for each CMU in this bindings header too. Signed-off-by: David Virag Signed-off-by: Sylwester Nawrocki Reviewed-by: Krzysztof Kozlowski Reviewed-by: Sam Protsenko Acked-by: Rob Herring Link: https://lore.kernel.org/r/20211206153124.427102-2-virag.david003@gmail.com --- include/dt-bindings/clock/exynos7885.h | 115 +++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 include/dt-bindings/clock/exynos7885.h (limited to 'include') diff --git a/include/dt-bindings/clock/exynos7885.h b/include/dt-bindings/clock/exynos7885.h new file mode 100644 index 000000000000..1f8701691d62 --- /dev/null +++ b/include/dt-bindings/clock/exynos7885.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2021 Dávid Virág + * + * Device Tree binding constants for Exynos7885 clock controller. + */ + +#ifndef _DT_BINDINGS_CLOCK_EXYNOS_7885_H +#define _DT_BINDINGS_CLOCK_EXYNOS_7885_H + +/* CMU_TOP */ +#define CLK_FOUT_SHARED0_PLL 1 +#define CLK_FOUT_SHARED1_PLL 2 +#define CLK_DOUT_SHARED0_DIV2 3 +#define CLK_DOUT_SHARED0_DIV3 4 +#define CLK_DOUT_SHARED0_DIV4 5 +#define CLK_DOUT_SHARED0_DIV5 6 +#define CLK_DOUT_SHARED1_DIV2 7 +#define CLK_DOUT_SHARED1_DIV3 8 +#define CLK_DOUT_SHARED1_DIV4 9 +#define CLK_MOUT_CORE_BUS 10 +#define CLK_MOUT_CORE_CCI 11 +#define CLK_MOUT_CORE_G3D 12 +#define CLK_DOUT_CORE_BUS 13 +#define CLK_DOUT_CORE_CCI 14 +#define CLK_DOUT_CORE_G3D 15 +#define CLK_GOUT_CORE_BUS 16 +#define CLK_GOUT_CORE_CCI 17 +#define CLK_GOUT_CORE_G3D 18 +#define CLK_MOUT_PERI_BUS 19 +#define CLK_MOUT_PERI_SPI0 20 +#define CLK_MOUT_PERI_SPI1 21 +#define CLK_MOUT_PERI_UART0 22 +#define CLK_MOUT_PERI_UART1 23 +#define CLK_MOUT_PERI_UART2 24 +#define CLK_MOUT_PERI_USI0 25 +#define CLK_MOUT_PERI_USI1 26 +#define CLK_MOUT_PERI_USI2 27 +#define CLK_DOUT_PERI_BUS 28 +#define CLK_DOUT_PERI_SPI0 29 +#define CLK_DOUT_PERI_SPI1 30 +#define CLK_DOUT_PERI_UART0 31 +#define CLK_DOUT_PERI_UART1 32 +#define CLK_DOUT_PERI_UART2 33 +#define CLK_DOUT_PERI_USI0 34 +#define CLK_DOUT_PERI_USI1 35 +#define CLK_DOUT_PERI_USI2 36 +#define CLK_GOUT_PERI_BUS 37 +#define CLK_GOUT_PERI_SPI0 38 +#define CLK_GOUT_PERI_SPI1 39 +#define CLK_GOUT_PERI_UART0 40 +#define CLK_GOUT_PERI_UART1 41 +#define CLK_GOUT_PERI_UART2 42 +#define CLK_GOUT_PERI_USI0 43 +#define CLK_GOUT_PERI_USI1 44 +#define CLK_GOUT_PERI_USI2 45 +#define TOP_NR_CLK 46 + +/* CMU_CORE */ +#define CLK_MOUT_CORE_BUS_USER 1 +#define CLK_MOUT_CORE_CCI_USER 2 +#define CLK_MOUT_CORE_G3D_USER 3 +#define CLK_MOUT_CORE_GIC 4 +#define CLK_DOUT_CORE_BUSP 5 +#define CLK_GOUT_CCI_ACLK 6 +#define CLK_GOUT_GIC400_CLK 7 +#define CORE_NR_CLK 8 + +/* CMU_PERI */ +#define CLK_MOUT_PERI_BUS_USER 1 +#define CLK_MOUT_PERI_SPI0_USER 2 +#define CLK_MOUT_PERI_SPI1_USER 3 +#define CLK_MOUT_PERI_UART0_USER 4 +#define CLK_MOUT_PERI_UART1_USER 5 +#define CLK_MOUT_PERI_UART2_USER 6 +#define CLK_MOUT_PERI_USI0_USER 7 +#define CLK_MOUT_PERI_USI1_USER 8 +#define CLK_MOUT_PERI_USI2_USER 9 +#define CLK_GOUT_GPIO_TOP_PCLK 10 +#define CLK_GOUT_HSI2C0_PCLK 11 +#define CLK_GOUT_HSI2C1_PCLK 12 +#define CLK_GOUT_HSI2C2_PCLK 13 +#define CLK_GOUT_HSI2C3_PCLK 14 +#define CLK_GOUT_I2C0_PCLK 15 +#define CLK_GOUT_I2C1_PCLK 16 +#define CLK_GOUT_I2C2_PCLK 17 +#define CLK_GOUT_I2C3_PCLK 18 +#define CLK_GOUT_I2C4_PCLK 19 +#define CLK_GOUT_I2C5_PCLK 20 +#define CLK_GOUT_I2C6_PCLK 21 +#define CLK_GOUT_I2C7_PCLK 22 +#define CLK_GOUT_PWM_MOTOR_PCLK 23 +#define CLK_GOUT_SPI0_PCLK 24 +#define CLK_GOUT_SPI0_EXT_CLK 25 +#define CLK_GOUT_SPI1_PCLK 26 +#define CLK_GOUT_SPI1_EXT_CLK 27 +#define CLK_GOUT_UART0_EXT_UCLK 28 +#define CLK_GOUT_UART0_PCLK 29 +#define CLK_GOUT_UART1_EXT_UCLK 30 +#define CLK_GOUT_UART1_PCLK 31 +#define CLK_GOUT_UART2_EXT_UCLK 32 +#define CLK_GOUT_UART2_PCLK 33 +#define CLK_GOUT_USI0_PCLK 34 +#define CLK_GOUT_USI0_SCLK 35 +#define CLK_GOUT_USI1_PCLK 36 +#define CLK_GOUT_USI1_SCLK 37 +#define CLK_GOUT_USI2_PCLK 38 +#define CLK_GOUT_USI2_SCLK 39 +#define CLK_GOUT_MCT_PCLK 40 +#define CLK_GOUT_SYSREG_PERI_PCLK 41 +#define CLK_GOUT_WDT0_PCLK 42 +#define CLK_GOUT_WDT1_PCLK 43 +#define PERI_NR_CLK 44 + +#endif /* _DT_BINDINGS_CLOCK_EXYNOS_7885_H */ -- cgit v1.2.3 From 7d4203c13435c0bdae61bf16bbd0408d5b958ade Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 25 Nov 2021 18:15:37 +0100 Subject: mm: add virt_to_folio() and folio_address() These two wrappers around their respective struct page variants will be useful in the following patches. Signed-off-by: Vlastimil Babka Acked-by: Johannes Weiner Reviewed-by: Roman Gushchin --- include/linux/mm.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a7e4a9e7d807..4a6cf22483da 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -863,6 +863,13 @@ static inline struct page *virt_to_head_page(const void *x) return compound_head(page); } +static inline struct folio *virt_to_folio(const void *x) +{ + struct page *page = virt_to_page(x); + + return page_folio(page); +} + void __put_page(struct page *page); void put_pages_list(struct list_head *pages); @@ -1753,6 +1760,11 @@ void page_address_init(void); #define page_address_init() do { } while(0) #endif +static inline void *folio_address(const struct folio *folio) +{ + return page_address(&folio->page); +} + extern void *page_rmapping(struct page *page); extern struct anon_vma *page_anon_vma(struct page *page); extern pgoff_t __page_file_index(struct page *page); -- cgit v1.2.3 From d5c383f2c98ac58c210b266cdaf7b86bc32d1ad1 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 17 Dec 2021 15:30:56 +0000 Subject: iommu/iova: Squash entry_dtor abstraction All flush queues are driven by iommu-dma now, so there is no need to abstract entry_dtor or its data any more. Squash the now-canonical implementation directly into the IOVA code to get it out of the way. Reviewed-by: John Garry Reviewed-by: Christoph Hellwig Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/2260f8de00ab5e0f9d2a1cf8978e6ae7cd4f182c.1639753638.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- include/linux/iova.h | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/iova.h b/include/linux/iova.h index 71d8a2de6635..e746d8e41449 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -40,9 +40,6 @@ struct iova_domain; /* Call-Back from IOVA code into IOMMU drivers */ typedef void (* iova_flush_cb)(struct iova_domain *domain); -/* Destructor for per-entry data */ -typedef void (* iova_entry_dtor)(unsigned long data); - /* Number of entries per Flush Queue */ #define IOVA_FQ_SIZE 256 @@ -53,7 +50,7 @@ typedef void (* iova_entry_dtor)(unsigned long data); struct iova_fq_entry { unsigned long iova_pfn; unsigned long pages; - unsigned long data; + struct page *freelist; u64 counter; /* Flush counter when this entrie was added */ }; @@ -88,9 +85,6 @@ struct iova_domain { iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU TLBs */ - iova_entry_dtor entry_dtor; /* IOMMU driver specific destructor for - iova entry */ - struct timer_list fq_timer; /* Timer to regularily empty the flush-queues */ atomic_t fq_timer_on; /* 1 when timer is active, 0 @@ -146,15 +140,14 @@ void free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size); void queue_iova(struct iova_domain *iovad, unsigned long pfn, unsigned long pages, - unsigned long data); + struct page *freelist); unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool flush_rcache); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); -int init_iova_flush_queue(struct iova_domain *iovad, - iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); +int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); #else @@ -189,12 +182,6 @@ static inline void free_iova_fast(struct iova_domain *iovad, { } -static inline void queue_iova(struct iova_domain *iovad, - unsigned long pfn, unsigned long pages, - unsigned long data) -{ -} - static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, @@ -216,13 +203,6 @@ static inline void init_iova_domain(struct iova_domain *iovad, { } -static inline int init_iova_flush_queue(struct iova_domain *iovad, - iova_flush_cb flush_cb, - iova_entry_dtor entry_dtor) -{ - return -ENODEV; -} - static inline struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) { -- cgit v1.2.3 From 649ad9835a3783bcb6c69368fa939e0010abb2c6 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 17 Dec 2021 15:30:57 +0000 Subject: iommu/iova: Squash flush_cb abstraction Once again, with iommu-dma now being the only flush queue user, we no longer need the extra level of indirection through flush_cb. Squash that and let the flush queue code call the domain method directly. This does mean temporarily having to carry an additional copy of the IOMMU domain pointer around instead, but only until a later patch untangles it again. Reviewed-by: John Garry Reviewed-by: Christoph Hellwig Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/e3f9b4acdd6640012ef4fbc819ac868d727b64a9.1639753638.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- include/linux/iova.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/iova.h b/include/linux/iova.h index e746d8e41449..99be4fcea4f3 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -14,6 +14,7 @@ #include #include #include +#include /* iova structure */ struct iova { @@ -35,11 +36,6 @@ struct iova_rcache { struct iova_cpu_rcache __percpu *cpu_rcaches; }; -struct iova_domain; - -/* Call-Back from IOVA code into IOMMU drivers */ -typedef void (* iova_flush_cb)(struct iova_domain *domain); - /* Number of entries per Flush Queue */ #define IOVA_FQ_SIZE 256 @@ -82,8 +78,7 @@ struct iova_domain { struct iova anchor; /* rbtree lookup anchor */ struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ - iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU - TLBs */ + struct iommu_domain *fq_domain; struct timer_list fq_timer; /* Timer to regularily empty the flush-queues */ @@ -147,7 +142,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); -int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb); +int init_iova_flush_queue(struct iova_domain *iovad, struct iommu_domain *fq_domain); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); #else -- cgit v1.2.3 From 87f60cc65d24939353b40aa1d9297fea080cdf8d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 17 Dec 2021 15:31:00 +0000 Subject: iommu/vt-d: Use put_pages_list page->freelist is for the use of slab. We already have the ability to free a list of pages in the core mm, but it requires the use of a list_head and for the pages to be chained together through page->lru. Switch the Intel IOMMU and IOVA code over to using free_pages_list(). Signed-off-by: Matthew Wilcox (Oracle) [rm: split from original patch, cosmetic tweaks, fix fq entries] Signed-off-by: Robin Murphy Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/2115b560d9a0ce7cd4b948bd51a2b7bde8fdfd59.1639753638.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 3 ++- include/linux/iova.h | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d2f3435e7d17..de0c57a567c8 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -186,7 +186,7 @@ struct iommu_iotlb_gather { unsigned long start; unsigned long end; size_t pgsize; - struct page *freelist; + struct list_head freelist; bool queued; }; @@ -399,6 +399,7 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) { *gather = (struct iommu_iotlb_gather) { .start = ULONG_MAX, + .freelist = LIST_HEAD_INIT(gather->freelist), }; } diff --git a/include/linux/iova.h b/include/linux/iova.h index 99be4fcea4f3..072a09c06e8a 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -46,7 +46,7 @@ struct iova_rcache { struct iova_fq_entry { unsigned long iova_pfn; unsigned long pages; - struct page *freelist; + struct list_head freelist; u64 counter; /* Flush counter when this entrie was added */ }; @@ -135,7 +135,7 @@ void free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size); void queue_iova(struct iova_domain *iovad, unsigned long pfn, unsigned long pages, - struct page *freelist); + struct list_head *freelist); unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool flush_rcache); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, -- cgit v1.2.3 From a17e3026bc4da9135ca9a42ec0b1fa67f95172e3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 17 Dec 2021 15:31:03 +0000 Subject: iommu: Move flush queue data into iommu_dma_cookie Complete the move into iommu-dma by refactoring the flush queues themselves to belong to the DMA cookie rather than the IOVA domain. The refactoring may as well extend to some minor cosmetic aspects too, to help us stay one step ahead of the style police. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/24304722005bc6f144e2a1fdd865d1465722fc2e.1639753638.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- include/linux/iova.h | 44 +------------------------------------------- 1 file changed, 1 insertion(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/iova.h b/include/linux/iova.h index 072a09c06e8a..0abd48c5e622 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include /* iova structure */ struct iova { @@ -36,27 +33,6 @@ struct iova_rcache { struct iova_cpu_rcache __percpu *cpu_rcaches; }; -/* Number of entries per Flush Queue */ -#define IOVA_FQ_SIZE 256 - -/* Timeout (in ms) after which entries are flushed from the Flush-Queue */ -#define IOVA_FQ_TIMEOUT 10 - -/* Flush Queue entry for defered flushing */ -struct iova_fq_entry { - unsigned long iova_pfn; - unsigned long pages; - struct list_head freelist; - u64 counter; /* Flush counter when this entrie was added */ -}; - -/* Per-CPU Flush Queue structure */ -struct iova_fq { - struct iova_fq_entry entries[IOVA_FQ_SIZE]; - unsigned head, tail; - spinlock_t lock; -}; - /* holds all the iova translations for a domain */ struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ @@ -67,23 +43,9 @@ struct iova_domain { unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; unsigned long max32_alloc_size; /* Size of last failed allocation */ - struct iova_fq __percpu *fq; /* Flush Queue */ - - atomic64_t fq_flush_start_cnt; /* Number of TLB flushes that - have been started */ - - atomic64_t fq_flush_finish_cnt; /* Number of TLB flushes that - have been finished */ - struct iova anchor; /* rbtree lookup anchor */ - struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ - - struct iommu_domain *fq_domain; - struct timer_list fq_timer; /* Timer to regularily empty the - flush-queues */ - atomic_t fq_timer_on; /* 1 when timer is active, 0 - when not */ + struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ struct hlist_node cpuhp_dead; }; @@ -133,16 +95,12 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, bool size_aligned); void free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size); -void queue_iova(struct iova_domain *iovad, - unsigned long pfn, unsigned long pages, - struct list_head *freelist); unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool flush_rcache); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); -int init_iova_flush_queue(struct iova_domain *iovad, struct iommu_domain *fq_domain); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); #else -- cgit v1.2.3 From 51b1a5729469cef57a3c97aa014aa6e1d2b8d864 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Fri, 17 Dec 2021 18:15:47 +0200 Subject: dt-bindings: pinctrl: samsung: Add pin drive definitions for Exynos850 All Exynos850 GPIO blocks can use EXYNOS5420_PIN_DRV* definitions, except GPIO_HSI block. Add pin drive strength definitions for GPIO_HSI block correspondingly. Signed-off-by: Sam Protsenko Acked-by: Rob Herring Link: https://lore.kernel.org/r/20211217161549.24836-6-semen.protsenko@linaro.org Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/pinctrl/samsung.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/pinctrl/samsung.h b/include/dt-bindings/pinctrl/samsung.h index b1832506b923..950970634dfe 100644 --- a/include/dt-bindings/pinctrl/samsung.h +++ b/include/dt-bindings/pinctrl/samsung.h @@ -36,7 +36,10 @@ #define EXYNOS5260_PIN_DRV_LV4 2 #define EXYNOS5260_PIN_DRV_LV6 3 -/* Drive strengths for Exynos5410, Exynos542x and Exynos5800 */ +/* + * Drive strengths for Exynos5410, Exynos542x, Exynos5800 and Exynos850 (except + * GPIO_HSI block) + */ #define EXYNOS5420_PIN_DRV_LV1 0 #define EXYNOS5420_PIN_DRV_LV2 1 #define EXYNOS5420_PIN_DRV_LV3 2 @@ -56,6 +59,14 @@ #define EXYNOS5433_PIN_DRV_SLOW_SR5 0xc #define EXYNOS5433_PIN_DRV_SLOW_SR6 0xf +/* Drive strengths for Exynos850 GPIO_HSI block */ +#define EXYNOS850_HSI_PIN_DRV_LV1 0 /* 1x */ +#define EXYNOS850_HSI_PIN_DRV_LV1_5 1 /* 1.5x */ +#define EXYNOS850_HSI_PIN_DRV_LV2 2 /* 2x */ +#define EXYNOS850_HSI_PIN_DRV_LV2_5 3 /* 2.5x */ +#define EXYNOS850_HSI_PIN_DRV_LV3 4 /* 3x */ +#define EXYNOS850_HSI_PIN_DRV_LV4 5 /* 4x */ + #define EXYNOS_PIN_FUNC_INPUT 0 #define EXYNOS_PIN_FUNC_OUTPUT 1 #define EXYNOS_PIN_FUNC_2 2 -- cgit v1.2.3 From 28f350a67d291575492057c92ceb8518ecbace95 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 29 Nov 2021 15:32:41 +0200 Subject: cfg80211: Fix order of enum nl80211_band_iftype_attr documentation And fix the comment. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.4ef43aff0c5d.I96dcb743bcd4f387ba4cfaa61987aeb642ad762b@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 3e734826792f..bf69ecbc1c24 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3747,13 +3747,12 @@ enum nl80211_mpath_info { * capabilities IE * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE: HE PPE thresholds information as * defined in HE capabilities IE - * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band HE capability attribute currently - * defined * @NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA: HE 6GHz band capabilities (__le16), * given for all 6 GHz band channels * @NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS: vendor element capabilities that are * advertised on this band/for this iftype (binary) * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use + * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band attribute currently defined */ enum nl80211_band_iftype_attr { __NL80211_BAND_IFTYPE_ATTR_INVALID, -- cgit v1.2.3 From 6d501764288cf7869c7f54f1fcabd77bcd91b90e Mon Sep 17 00:00:00 2001 From: Nathan Errera Date: Mon, 29 Nov 2021 15:32:38 +0200 Subject: mac80211: introduce channel switch disconnect function Introduce a disconnect function that can be used when a channel switch error occurs. The channel switch can request to block the tx, and so, we need to make sure we do not send a deauth frame in this case. Signed-off-by: Nathan Errera Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.cd2a615a0702.I9edb14785586344af17644b610ab5be109dcef00@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e349f57d19ae..28e66cdae3ec 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6075,6 +6075,18 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw); */ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); +/** + * ieee80211_channel_switch_disconnect - disconnect due to channel switch error + * @vif &struct ieee80211_vif pointer from the add_interface callback. + * @block_tx: if %true, do not send deauth frame. + * + * Instruct mac80211 to disconnect due to a channel switch error. The channel + * switch can request to block the tx and so, we need to make sure we do not send + * a deauth frame in this case. + */ +void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, + bool block_tx); + /** * ieee80211_request_smps - request SM PS transition * @vif: &struct ieee80211_vif pointer from the add_interface callback. -- cgit v1.2.3 From a083ee8a4e03348fb90a4b24cbe957b3252c7b04 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 29 Nov 2021 15:32:34 +0200 Subject: cfg80211: Add support for notifying association comeback Thought the underline driver MLME can handle association temporal rejection with comeback, it is still useful to notify this to user space, as user space might want to handle the temporal rejection differently. For example, in case the comeback time is too long, user space can deauthenticate immediately and try to associate with a different AP. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.2467809e8cb3.I45574185b582666bc78eef0c29a4c36b478e5382@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 ++++++++++++ include/uapi/linux/nl80211.h | 7 +++++++ 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a887086cb103..c9a9073d4c2a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8287,6 +8287,18 @@ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype, bool is_4addr, u8 check_swif); +/** + * cfg80211_assoc_comeback - notification of association that was + * temporarly rejected with a comeback + * @netdev: network device + * @bss: the bss entry with which association is in progress. + * @timeout: timeout interval value TUs. + * + * this function may sleep. the caller must hold the corresponding wdev's mutex. + */ +void cfg80211_assoc_comeback(struct net_device *netdev, + struct cfg80211_bss *bss, u32 timeout); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index bf69ecbc1c24..a8e20d25252b 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1232,6 +1232,11 @@ * &NL80211_ATTR_FILS_NONCES - for FILS Nonces * (STA Nonce 16 bytes followed by AP Nonce 16 bytes) * + * @NL80211_CMD_ASSOC_COMEBACK: notification about an association + * temporal rejection with comeback. The event includes %NL80211_ATTR_MAC + * to describe the BSSID address of the AP and %NL80211_ATTR_TIMEOUT to + * specify the timeout value. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1474,6 +1479,8 @@ enum nl80211_commands { NL80211_CMD_SET_FILS_AAD, + NL80211_CMD_ASSOC_COMEBACK, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ -- cgit v1.2.3 From a95bfb876fa87e2d0fa718ee61a8030ddf162d2b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 29 Nov 2021 14:11:24 +0100 Subject: cfg80211: rename offchannel_chain structs to background_chain to avoid confusion with ETSI standard ETSI standard defines "Offchannel CAC" as: "Off-Channel CAC is performed by a number of non-continuous checks spread over a period in time. This period, which is required to determine the presence of radar signals, is defined as the Off-Channel CAC Time.. Minimum Off-Channel CAC Time 6 minutes and Maximum Off-Channel CAC Time 4 hours..". mac80211 implementation refers to a dedicated hw chain used for continuous radar monitoring. Rename offchannel_* references to background_* in order to avoid confusion with ETSI standard. Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/4204cc1d648d76b44557981713231e030a3bd991.1638190762.git.lorenzo@kernel.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 20 ++++++++++---------- include/net/mac80211.h | 10 +++++----- include/uapi/linux/nl80211.h | 14 +++++++------- 3 files changed, 22 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c9a9073d4c2a..e29d904eccff 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4073,14 +4073,14 @@ struct mgmt_frame_regs { * those to decrypt (Re)Association Request and encrypt (Re)Association * Response frame. * - * @set_radar_offchan: Configure dedicated offchannel chain available for + * @set_radar_background: Configure dedicated offchannel chain available for * radar/CAC detection on some hw. This chain can't be used to transmit * or receive frames and it is bounded to a running wdev. - * Offchannel radar/CAC detection allows to avoid the CAC downtime + * Background radar/CAC detection allows to avoid the CAC downtime * switching to a different channel during CAC detection on the selected * radar channel. * The caller is expected to set chandef pointer to NULL in order to - * disable offchannel CAC/radar detection. + * disable background CAC/radar detection. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4413,8 +4413,8 @@ struct cfg80211_ops { struct cfg80211_color_change_settings *params); int (*set_fils_aad)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_fils_aad *fils_aad); - int (*set_radar_offchan)(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef); + int (*set_radar_background)(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef); }; /* @@ -7626,9 +7626,9 @@ cfg80211_radar_event(struct wiphy *wiphy, } static inline void -cfg80211_offchan_radar_event(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - gfp_t gfp) +cfg80211_background_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) { __cfg80211_radar_event(wiphy, chandef, true, gfp); } @@ -7663,13 +7663,13 @@ void cfg80211_cac_event(struct net_device *netdev, enum nl80211_radar_event event, gfp_t gfp); /** - * cfg80211_offchan_cac_abort - Channel Availability Check offchan abort event + * cfg80211_background_cac_abort - Channel Availability Check offchan abort event * @wiphy: the wiphy * * This function is called by the driver when a Channel Availability Check * (CAC) is aborted by a offchannel dedicated chain. */ -void cfg80211_offchan_cac_abort(struct wiphy *wiphy); +void cfg80211_background_cac_abort(struct wiphy *wiphy); /** * cfg80211_gtk_rekey_notify - notify userspace about driver rekeying diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 28e66cdae3ec..8907338d52b5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3939,14 +3939,14 @@ struct ieee80211_prep_tx_info { * twt structure. * @twt_teardown_request: Update the hw with TWT teardown request received * from the peer. - * @set_radar_offchan: Configure dedicated offchannel chain available for + * @set_radar_background: Configure dedicated offchannel chain available for * radar/CAC detection on some hw. This chain can't be used to transmit * or receive frames and it is bounded to a running wdev. - * Offchannel radar/CAC detection allows to avoid the CAC downtime + * Background radar/CAC detection allows to avoid the CAC downtime * switching to a different channel during CAC detection on the selected * radar channel. * The caller is expected to set chandef pointer to NULL in order to - * disable offchannel CAC/radar detection. + * disable background CAC/radar detection. * @net_fill_forward_path: Called from .ndo_fill_forward_path in order to * resolve a path for hardware flow offloading */ @@ -4277,8 +4277,8 @@ struct ieee80211_ops { struct ieee80211_twt_setup *twt); void (*twt_teardown_request)(struct ieee80211_hw *hw, struct ieee80211_sta *sta, u8 flowid); - int (*set_radar_offchan)(struct ieee80211_hw *hw, - struct cfg80211_chan_def *chandef); + int (*set_radar_background)(struct ieee80211_hw *hw, + struct cfg80211_chan_def *chandef); int (*net_fill_forward_path)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a8e20d25252b..5ce20fb44f24 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2646,10 +2646,10 @@ enum nl80211_commands { * Mandatory parameter for the transmitting interface to enable MBSSID. * Optional for the non-transmitting interfaces. * - * @NL80211_ATTR_RADAR_OFFCHAN: Configure dedicated offchannel chain available for - * radar/CAC detection on some hw. This chain can't be used to transmit - * or receive frames and it is bounded to a running wdev. - * Offchannel radar/CAC detection allows to avoid the CAC downtime + * @NL80211_ATTR_RADAR_BACKGROUND: Configure dedicated offchannel chain + * available for radar/CAC detection on some hw. This chain can't be used + * to transmit or receive frames and it is bounded to a running wdev. + * Background radar/CAC detection allows to avoid the CAC downtime * switching on a different channel during CAC detection on the selected * radar channel. * @@ -3159,7 +3159,7 @@ enum nl80211_attrs { NL80211_ATTR_MBSSID_CONFIG, NL80211_ATTR_MBSSID_ELEMS, - NL80211_ATTR_RADAR_OFFCHAN, + NL80211_ATTR_RADAR_BACKGROUND, /* add attributes here, update the policy in nl80211.c */ @@ -6066,7 +6066,7 @@ enum nl80211_feature_flags { * frames. Userspace has to share FILS AAD details to the driver by using * @NL80211_CMD_SET_FILS_AAD. * - * @NL80211_EXT_FEATURE_RADAR_OFFCHAN: Device supports offchannel radar/CAC + * @NL80211_EXT_FEATURE_RADAR_BACKGROUND: Device supports background radar/CAC * detection. * * @NUM_NL80211_EXT_FEATURES: number of extended features. @@ -6135,7 +6135,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE, NL80211_EXT_FEATURE_BSS_COLOR, NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD, - NL80211_EXT_FEATURE_RADAR_OFFCHAN, + NL80211_EXT_FEATURE_RADAR_BACKGROUND, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 7f599aeccbd2bcba800c6c7ecc4586fd8cafc1d8 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Thu, 2 Dec 2021 14:36:10 +0200 Subject: cfg80211: Use the HE operation IE to determine a 6GHz BSS channel A non-collocated AP whose primary channel is not a PSC channel may transmit a duplicated beacon on the corresponding PSC channel in which it would indicate its true primary channel. Use this inforamtion contained in the HE operation IE to determine the primary channel of the AP. In case of invalid infomration ignore it and use the channel the frame was received on. Signed-off-by: Ayala Beker Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211202143322.71eb2176e54e.I130f678e4aa390973ab39d838bbfe7b2d54bff8e@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e29d904eccff..b59baf9dfd67 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6385,17 +6385,6 @@ static inline void cfg80211_gen_new_bssid(const u8 *bssid, u8 max_bssid, u64_to_ether_addr(new_bssid_u64, new_bssid); } -/** - * cfg80211_get_ies_channel_number - returns the channel number from ies - * @ie: IEs - * @ielen: length of IEs - * @band: enum nl80211_band of the channel - * - * Returns the channel number, or -1 if none could be determined. - */ -int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, - enum nl80211_band band); - /** * cfg80211_is_element_inherited - returns if element ID should be inherited * @element: element to check @@ -6431,6 +6420,19 @@ enum cfg80211_bss_frame_type { CFG80211_BSS_FTYPE_PRESP, }; +/** + * cfg80211_get_ies_channel_number - returns the channel number from ies + * @ie: IEs + * @ielen: length of IEs + * @band: enum nl80211_band of the channel + * @ftype: frame type + * + * Returns the channel number, or -1 if none could be determined. + */ +int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, + enum nl80211_band band, + enum cfg80211_bss_frame_type ftype); + /** * cfg80211_inform_bss_data - inform cfg80211 of a new BSS * -- cgit v1.2.3 From 47301a74bbfa80cef876e646a8c5fec03c20fc8d Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Fri, 26 Nov 2021 12:55:18 +0530 Subject: nl80211: Add support to set AP settings flags with single attribute In previous method each AP settings flag is represented by a top-level flag attribute and conversion to enum cfg80211_ap_settings_flags had to be done before sending them to driver. This commit is to make it easier to define new AP settings flags and sending them to driver. This commit also deprecate sending of %NL80211_ATTR_EXTERNAL_AUTH_SUPPORT in %NL80211_CMD_START_AP. But to maintain backwards compatibility checks for %NL80211_ATTR_EXTERNAL_AUTH_SUPPORT in %NL80211_CMD_START_AP when %NL80211_ATTR_AP_SETTINGS_FLAGS not present in %NL80211_CMD_START_AP. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1637911519-21306-1-git-send-email-vjakkam@codeaurora.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 ----------- include/uapi/linux/nl80211.h | 20 +++++++++++++++++++- 2 files changed, 19 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index b59baf9dfd67..d19e48f9fdc6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1187,17 +1187,6 @@ struct cfg80211_unsol_bcast_probe_resp { const u8 *tmpl; }; -/** - * enum cfg80211_ap_settings_flags - AP settings flags - * - * Used by cfg80211_ap_settings - * - * @AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external authentication - */ -enum cfg80211_ap_settings_flags { - AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = BIT(0), -}; - /** * struct cfg80211_ap_settings - AP configuration * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5ce20fb44f24..ab461b2be157 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2477,7 +2477,9 @@ enum nl80211_commands { * space supports external authentication. This attribute shall be used * with %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP request. The driver * may offload authentication processing to user space if this capability - * is indicated in the respective requests from the user space. + * is indicated in the respective requests from the user space. (This flag + * attribute deprecated for %NL80211_CMD_START_AP, use + * %NL80211_ATTR_AP_SETTINGS_FLAGS) * * @NL80211_ATTR_NSS: Station's New/updated RX_NSS value notified using this * u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED. @@ -2653,6 +2655,10 @@ enum nl80211_commands { * switching on a different channel during CAC detection on the selected * radar channel. * + * @NL80211_ATTR_AP_SETTINGS_FLAGS: u32 attribute contains ap settings flags, + * enumerated in &enum nl80211_ap_settings_flags. This attribute shall be + * used with %NL80211_CMD_START_AP request. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3161,6 +3167,8 @@ enum nl80211_attrs { NL80211_ATTR_RADAR_BACKGROUND, + NL80211_ATTR_AP_SETTINGS_FLAGS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -7481,4 +7489,14 @@ enum nl80211_mbssid_config_attributes { NL80211_MBSSID_CONFIG_ATTR_MAX = __NL80211_MBSSID_CONFIG_ATTR_LAST - 1, }; +/** + * enum nl80211_ap_settings_flags - AP settings flags + * + * @NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external + * authentication. + */ +enum nl80211_ap_settings_flags { + NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = 1 << 0, +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From 87c1aec15dee8bdb245aabbd181f9f9e1a4770ae Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Fri, 26 Nov 2021 12:55:19 +0530 Subject: nl80211: Add support to offload SA Query procedures for AP SME device Add a flag attribute to use in ap settings to indicate userspace supports offloading of SA Query procedures to driver. Also add AP SME device feature flag to advertise that the SA Query procedures offloaded to driver when userspace indicates support for offloading of SA Query procedures. Driver handles SA Query procedures in driver's SME it self and skip sending SA Query request or response frames to userspace when userspace indicates support for SA Query procedures offload. But if userspace doesn't advertise support for SA Query procedures offload driver shall not offload SA Query procedures handling. Also userspace with SA Query procedures offload capability shall skip SA Query specific validations when driver indicates support for handling SA Query procedures. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1637911519-21306-2-git-send-email-vjakkam@codeaurora.org Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ab461b2be157..d997252de986 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5760,13 +5760,15 @@ enum nl80211_tdls_operation { NL80211_TDLS_DISABLE_LINK, }; -/* +/** * enum nl80211_ap_sme_features - device-integrated AP features - * Reserved for future use, no bits are defined in - * NL80211_ATTR_DEVICE_AP_SME yet. + * @NL80211_AP_SME_SA_QUERY_OFFLOAD: SA Query procedures offloaded to driver + * when user space indicates support for SA Query procedures offload during + * "start ap" with %NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT. + */ enum nl80211_ap_sme_features { + NL80211_AP_SME_SA_QUERY_OFFLOAD = 1 << 0, }; - */ /** * enum nl80211_feature_flags - device/driver features @@ -7494,9 +7496,15 @@ enum nl80211_mbssid_config_attributes { * * @NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external * authentication. + * @NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT: Userspace supports SA Query + * procedures offload to driver. If driver advertises + * %NL80211_AP_SME_SA_QUERY_OFFLOAD in AP SME features, userspace shall + * ignore SA Query procedures and validations when this flag is set by + * userspace. */ enum nl80211_ap_settings_flags { NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = 1 << 0, + NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT = 1 << 1, }; #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From d9a8297e873eda9d47aa5895ca47dc12eca0b198 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Dec 2021 11:09:25 +0100 Subject: nl82011: clarify interface combinations wrt. channels Thinking about MLD (Draft 802.11be) now, it's clear that we'll have new limitations where different stations (or links) must be on _different_ channels (or in different frequency ranges even.) Clarify that the current limit of multiple channels is a maximum and the same one is OK. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20211201110924.2816d91b6862.I2d997abd525574529f88e941d90aeb640dbb1abf@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d997252de986..f1a9d6594df2 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5578,7 +5578,7 @@ enum nl80211_iface_limit_attrs { * => allows 8 of AP/GO that can have BI gcd >= min gcd * * numbers = [ #{STA} <= 2 ], channels = 2, max = 2 - * => allows two STAs on different channels + * => allows two STAs on the same or on different channels * * numbers = [ #{STA} <= 1, #{P2P-client,P2P-GO} <= 3 ], max = 4 * => allows a STA plus three P2P interfaces -- cgit v1.2.3 From 5bc9a9dd75351023793d8aa4116ead005d659729 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 19 Dec 2021 21:51:24 +0200 Subject: rfkill: allow to get the software rfkill state iwlwifi needs to be able to differentiate between the software rfkill state and the hardware rfkill state. The reason for this is that iwlwifi needs to notify any change in the software rfkill state even when it doesn't own the device (which means even when the hardware rfkill is asserted). In order to be able to know the software rfkill when the host does not own the device, iwlwifi needs to be able to ask the state of the software rfkill ignoring the state of the hardware rfkill. Signed-off-by: Emmanuel Grumbach Link: https://lore.kernel.org/r/20211219195124.125689-1-emmanuel.grumbach@intel.com Signed-off-by: Johannes Berg --- include/linux/rfkill.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 231e06b74b50..c35f3962dc4f 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -229,6 +229,13 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw); */ bool rfkill_blocked(struct rfkill *rfkill); +/** + * rfkill_soft_blocked - Query soft rfkill block state + * + * @rfkill: rfkill struct to query + */ +bool rfkill_soft_blocked(struct rfkill *rfkill); + /** * rfkill_find_type - Helper for finding rfkill type by name * @name: the name of the type -- cgit v1.2.3 From e047d0372689f5d4231eefb731b60ac64720bbf0 Mon Sep 17 00:00:00 2001 From: Ricard Wanderlof Date: Wed, 15 Dec 2021 18:01:24 +0100 Subject: ASoC: tlv320adc3xxx: New codec bindings DT bindings for Texas Instruments TLV320ADC3001 and TLV320ADC3101 audio ADCs. Signed-off-by: Ricard Wanderlof Link: https://lore.kernel.org/r/alpine.DEB.2.21.2112151759170.27889@lap5cg0092dnk.se.axis.com Signed-off-by: Mark Brown --- include/dt-bindings/sound/tlv320adc3xxx.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/dt-bindings/sound/tlv320adc3xxx.h (limited to 'include') diff --git a/include/dt-bindings/sound/tlv320adc3xxx.h b/include/dt-bindings/sound/tlv320adc3xxx.h new file mode 100644 index 000000000000..ec988439da20 --- /dev/null +++ b/include/dt-bindings/sound/tlv320adc3xxx.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Devicetree bindings definitions for tlv320adc3xxx driver. + * + * Copyright (C) 2021 Axis Communications AB + */ +#ifndef __DT_TLV320ADC3XXX_H +#define __DT_TLV320ADC3XXX_H + +#define ADC3XXX_GPIO_DISABLED 0 /* I/O buffers powered down */ +#define ADC3XXX_GPIO_INPUT 1 /* Various non-GPIO inputs */ +#define ADC3XXX_GPIO_GPI 2 /* General purpose input */ +#define ADC3XXX_GPIO_GPO 3 /* General purpose output */ +#define ADC3XXX_GPIO_CLKOUT 4 /* Source set in reg. CLKOUT_MUX */ +#define ADC3XXX_GPIO_INT1 5 /* INT1 output */ +#define ADC3XXX_GPIO_INT2 6 /* INT2 output */ +/* value 7 is reserved */ +#define ADC3XXX_GPIO_SECONDARY_BCLK 8 /* Codec interface secondary BCLK */ +#define ADC3XXX_GPIO_SECONDARY_WCLK 9 /* Codec interface secondary WCLK */ +#define ADC3XXX_GPIO_ADC_MOD_CLK 10 /* Clock output for digital mics */ +/* values 11-15 reserved */ + +#define ADC3XXX_MICBIAS_OFF 0 /* Micbias pin powered off */ +#define ADC3XXX_MICBIAS_2_0V 1 /* Micbias pin set to 2.0V */ +#define ADC3XXX_MICBIAS_2_5V 2 /* Micbias pin set to 2.5V */ +#define ADC3XXX_MICBIAS_AVDD 3 /* Use AVDD voltage for micbias pin */ + +#endif /* __DT_TLV320ADC3XXX_H */ -- cgit v1.2.3 From aade40b62745cf0b4e8a17d43652c5faff354e6b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 20 Dec 2021 13:34:48 +0100 Subject: iommu/iova: Temporarily include dma-mapping.h from iova.h Some users of iova.h still expect that dma-mapping.h is also included. Re-add the include until these users are updated to fix compile failures in the iommu tree. Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20211220123448.19996-1-joro@8bytes.org Signed-off-by: Joerg Roedel --- include/linux/iova.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iova.h b/include/linux/iova.h index 0abd48c5e622..cea79cb9f26c 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -12,6 +12,7 @@ #include #include #include +#include /* iova structure */ struct iova { -- cgit v1.2.3 From e82513696eadcf71048be174b4b71f9903ba4afd Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Mon, 13 Dec 2021 18:40:33 +0900 Subject: dt-bindings: gpio: msc313: Add offsets for ssd20xd Add the gpio offsets for the SSD201 and SSD202D chips. Signed-off-by: Daniel Palmer Acked-by: Rob Herring Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- include/dt-bindings/gpio/msc313-gpio.h | 71 ++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/gpio/msc313-gpio.h b/include/dt-bindings/gpio/msc313-gpio.h index 2dd56683d3c1..5458c6580a02 100644 --- a/include/dt-bindings/gpio/msc313-gpio.h +++ b/include/dt-bindings/gpio/msc313-gpio.h @@ -50,4 +50,75 @@ #define MSC313_GPIO_SPI0_DI (MSC313_GPIO_SPI0 + 2) #define MSC313_GPIO_SPI0_DO (MSC313_GPIO_SPI0 + 3) +/* SSD20x */ +#define SSD20XD_GPIO_FUART 0 +#define SSD20XD_GPIO_FUART_RX (SSD20XD_GPIO_FUART + 0) +#define SSD20XD_GPIO_FUART_TX (SSD20XD_GPIO_FUART + 1) +#define SSD20XD_GPIO_FUART_CTS (SSD20XD_GPIO_FUART + 2) +#define SSD20XD_GPIO_FUART_RTS (SSD20XD_GPIO_FUART + 3) + +#define SSD20XD_GPIO_SD (SSD20XD_GPIO_FUART_RTS + 1) +#define SSD20XD_GPIO_SD_CLK (SSD20XD_GPIO_SD + 0) +#define SSD20XD_GPIO_SD_CMD (SSD20XD_GPIO_SD + 1) +#define SSD20XD_GPIO_SD_D0 (SSD20XD_GPIO_SD + 2) +#define SSD20XD_GPIO_SD_D1 (SSD20XD_GPIO_SD + 3) +#define SSD20XD_GPIO_SD_D2 (SSD20XD_GPIO_SD + 4) +#define SSD20XD_GPIO_SD_D3 (SSD20XD_GPIO_SD + 5) + +#define SSD20XD_GPIO_UART0 (SSD20XD_GPIO_SD_D3 + 1) +#define SSD20XD_GPIO_UART0_RX (SSD20XD_GPIO_UART0 + 0) +#define SSD20XD_GPIO_UART0_TX (SSD20XD_GPIO_UART0 + 1) + +#define SSD20XD_GPIO_UART1 (SSD20XD_GPIO_UART0_TX + 1) +#define SSD20XD_GPIO_UART1_RX (SSD20XD_GPIO_UART1 + 0) +#define SSD20XD_GPIO_UART1_TX (SSD20XD_GPIO_UART1 + 1) + +#define SSD20XD_GPIO_TTL (SSD20XD_GPIO_UART1_TX + 1) +#define SSD20XD_GPIO_TTL0 (SSD20XD_GPIO_TTL + 0) +#define SSD20XD_GPIO_TTL1 (SSD20XD_GPIO_TTL + 1) +#define SSD20XD_GPIO_TTL2 (SSD20XD_GPIO_TTL + 2) +#define SSD20XD_GPIO_TTL3 (SSD20XD_GPIO_TTL + 3) +#define SSD20XD_GPIO_TTL4 (SSD20XD_GPIO_TTL + 4) +#define SSD20XD_GPIO_TTL5 (SSD20XD_GPIO_TTL + 5) +#define SSD20XD_GPIO_TTL6 (SSD20XD_GPIO_TTL + 6) +#define SSD20XD_GPIO_TTL7 (SSD20XD_GPIO_TTL + 7) +#define SSD20XD_GPIO_TTL8 (SSD20XD_GPIO_TTL + 8) +#define SSD20XD_GPIO_TTL9 (SSD20XD_GPIO_TTL + 9) +#define SSD20XD_GPIO_TTL10 (SSD20XD_GPIO_TTL + 10) +#define SSD20XD_GPIO_TTL11 (SSD20XD_GPIO_TTL + 11) +#define SSD20XD_GPIO_TTL12 (SSD20XD_GPIO_TTL + 12) +#define SSD20XD_GPIO_TTL13 (SSD20XD_GPIO_TTL + 13) +#define SSD20XD_GPIO_TTL14 (SSD20XD_GPIO_TTL + 14) +#define SSD20XD_GPIO_TTL15 (SSD20XD_GPIO_TTL + 15) +#define SSD20XD_GPIO_TTL16 (SSD20XD_GPIO_TTL + 16) +#define SSD20XD_GPIO_TTL17 (SSD20XD_GPIO_TTL + 17) +#define SSD20XD_GPIO_TTL18 (SSD20XD_GPIO_TTL + 18) +#define SSD20XD_GPIO_TTL19 (SSD20XD_GPIO_TTL + 19) +#define SSD20XD_GPIO_TTL20 (SSD20XD_GPIO_TTL + 20) +#define SSD20XD_GPIO_TTL21 (SSD20XD_GPIO_TTL + 21) +#define SSD20XD_GPIO_TTL22 (SSD20XD_GPIO_TTL + 22) +#define SSD20XD_GPIO_TTL23 (SSD20XD_GPIO_TTL + 23) +#define SSD20XD_GPIO_TTL24 (SSD20XD_GPIO_TTL + 24) +#define SSD20XD_GPIO_TTL25 (SSD20XD_GPIO_TTL + 25) +#define SSD20XD_GPIO_TTL26 (SSD20XD_GPIO_TTL + 26) +#define SSD20XD_GPIO_TTL27 (SSD20XD_GPIO_TTL + 27) + +#define SSD20XD_GPIO_GPIO (SSD20XD_GPIO_TTL27 + 1) +#define SSD20XD_GPIO_GPIO0 (SSD20XD_GPIO_GPIO + 0) +#define SSD20XD_GPIO_GPIO1 (SSD20XD_GPIO_GPIO + 1) +#define SSD20XD_GPIO_GPIO2 (SSD20XD_GPIO_GPIO + 2) +#define SSD20XD_GPIO_GPIO3 (SSD20XD_GPIO_GPIO + 3) +#define SSD20XD_GPIO_GPIO4 (SSD20XD_GPIO_GPIO + 4) +#define SSD20XD_GPIO_GPIO5 (SSD20XD_GPIO_GPIO + 5) +#define SSD20XD_GPIO_GPIO6 (SSD20XD_GPIO_GPIO + 6) +#define SSD20XD_GPIO_GPIO7 (SSD20XD_GPIO_GPIO + 7) +#define SSD20XD_GPIO_GPIO10 (SSD20XD_GPIO_GPIO + 8) +#define SSD20XD_GPIO_GPIO11 (SSD20XD_GPIO_GPIO + 9) +#define SSD20XD_GPIO_GPIO12 (SSD20XD_GPIO_GPIO + 10) +#define SSD20XD_GPIO_GPIO13 (SSD20XD_GPIO_GPIO + 11) +#define SSD20XD_GPIO_GPIO14 (SSD20XD_GPIO_GPIO + 12) +#define SSD20XD_GPIO_GPIO85 (SSD20XD_GPIO_GPIO + 13) +#define SSD20XD_GPIO_GPIO86 (SSD20XD_GPIO_GPIO + 14) +#define SSD20XD_GPIO_GPIO90 (SSD20XD_GPIO_GPIO + 15) + #endif /* _DT_BINDINGS_MSC313_GPIO_H */ -- cgit v1.2.3 From 59f37b7370ef56e6faf25d0e18bc597a0af40bb8 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Sat, 4 Dec 2021 21:57:55 +0200 Subject: tty: serial: samsung: Remove USI initialization USI control is now extracted to the dedicated USI driver. Remove USI related code from serial driver to avoid conflicts and code duplication. Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20211204195757.8600-4-semen.protsenko@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_s3c.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/serial_s3c.h b/include/linux/serial_s3c.h index cf0de4a86640..f6c3323fc4c5 100644 --- a/include/linux/serial_s3c.h +++ b/include/linux/serial_s3c.h @@ -27,15 +27,6 @@ #define S3C2410_UERSTAT (0x14) #define S3C2410_UFSTAT (0x18) #define S3C2410_UMSTAT (0x1C) -#define USI_CON (0xC4) -#define USI_OPTION (0xC8) - -#define USI_CON_RESET (1<<0) -#define USI_CON_RESET_MASK (1<<0) - -#define USI_OPTION_HWACG_CLKREQ_ON (1<<1) -#define USI_OPTION_HWACG_CLKSTOP_ON (1<<2) -#define USI_OPTION_HWACG_MASK (3<<1) #define S3C2410_LCON_CFGMASK ((0xF<<3)|(0x3)) -- cgit v1.2.3 From 1a5e91d8375fc8369207cc0b9894a324f2bbf1d9 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Mon, 13 Dec 2021 02:14:02 -0500 Subject: swiotlb: Add swiotlb bounce buffer remap function for HV IVM In Isolation VM with AMD SEV, bounce buffer needs to be accessed via extra address space which is above shared_gpa_boundary (E.G 39 bit address line) reported by Hyper-V CPUID ISOLATION_CONFIG. The access physical address will be original physical address + shared_gpa_boundary. The shared_gpa_boundary in the AMD SEV SNP spec is called virtual top of memory(vTOM). Memory addresses below vTOM are automatically treated as private while memory above vTOM is treated as shared. Expose swiotlb_unencrypted_base for platforms to set unencrypted memory base offset and platform calls swiotlb_update_mem_attributes() to remap swiotlb mem to unencrypted address space. memremap() can not be called in the early stage and so put remapping code into swiotlb_update_mem_attributes(). Store remap address and use it to copy data from/to swiotlb bounce buffer. Signed-off-by: Tianyu Lan Acked-by: Christoph Hellwig Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20211213071407.314309-2-ltykernel@gmail.com Signed-off-by: Wei Liu --- include/linux/swiotlb.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 569272871375..f6c3638255d5 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -73,6 +73,9 @@ extern enum swiotlb_force swiotlb_force; * @end: The end address of the swiotlb memory pool. Used to do a quick * range check to see if the memory was in fact allocated by this * API. + * @vaddr: The vaddr of the swiotlb memory pool. The swiotlb memory pool + * may be remapped in the memory encrypted case and store virtual + * address for bounce buffer operation. * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and * @end. For default swiotlb, this is command line adjustable via * setup_io_tlb_npages. @@ -92,6 +95,7 @@ extern enum swiotlb_force swiotlb_force; struct io_tlb_mem { phys_addr_t start; phys_addr_t end; + void *vaddr; unsigned long nslabs; unsigned long used; unsigned int index; @@ -186,4 +190,6 @@ static inline bool is_swiotlb_for_alloc(struct device *dev) } #endif /* CONFIG_DMA_RESTRICTED_POOL */ +extern phys_addr_t swiotlb_unencrypted_base; + #endif /* __LINUX_SWIOTLB_H */ -- cgit v1.2.3 From 743b237c3a7b0f5b44aa704aae8a1058877b6322 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Mon, 13 Dec 2021 02:14:05 -0500 Subject: scsi: storvsc: Add Isolation VM support for storvsc driver In Isolation VM, all shared memory with host needs to mark visible to host via hvcall. vmbus_establish_gpadl() has already done it for storvsc rx/tx ring buffer. The page buffer used by vmbus_sendpacket_ mpb_desc() still needs to be handled. Use DMA API(scsi_dma_map/unmap) to map these memory during sending/receiving packet and return swiotlb bounce buffer dma address. In Isolation VM, swiotlb bounce buffer is marked to be visible to host and the swiotlb force mode is enabled. Set device's dma min align mask to HV_HYP_PAGE_SIZE - 1 in order to keep the original data offset in the bounce buffer. Signed-off-by: Tianyu Lan Reviewed-by: Long Li Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20211213071407.314309-5-ltykernel@gmail.com Signed-off-by: Wei Liu --- include/linux/hyperv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b823311eac79..650a0574b746 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1261,6 +1261,7 @@ struct hv_device { struct vmbus_channel *channel; struct kset *channels_kset; + struct device_dma_parameters dma_parms; /* place holder to keep track of the dir for hv device in debugfs */ struct dentry *debug_dir; -- cgit v1.2.3 From 846da38de0e8224f2f94b885125cf1fd2d7b0d39 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Mon, 13 Dec 2021 02:14:06 -0500 Subject: net: netvsc: Add Isolation VM support for netvsc driver In Isolation VM, all shared memory with host needs to mark visible to host via hvcall. vmbus_establish_gpadl() has already done it for netvsc rx/tx ring buffer. The page buffer used by vmbus_sendpacket_ pagebuffer() stills need to be handled. Use DMA API to map/umap these memory during sending/receiving packet and Hyper-V swiotlb bounce buffer dma address will be returned. The swiotlb bounce buffer has been masked to be visible to host during boot up. rx/tx ring buffer is allocated via vzalloc() and they need to be mapped into unencrypted address space(above vTOM) before sharing with host and accessing. Add hv_map/unmap_memory() to map/umap rx /tx ring buffer. Signed-off-by: Tianyu Lan Reviewed-by: Haiyang Zhang Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20211213071407.314309-6-ltykernel@gmail.com Signed-off-by: Wei Liu --- include/asm-generic/mshyperv.h | 2 ++ include/linux/hyperv.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 3e2248ac328e..94e73ba129c5 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -269,6 +269,8 @@ bool hv_isolation_type_snp(void); u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size); void hyperv_cleanup(void); bool hv_query_ext_cap(u64 cap_query); +void *hv_map_memory(void *addr, unsigned long size); +void hv_unmap_memory(void *addr); #else /* CONFIG_HYPERV */ static inline bool hv_is_hyperv_initialized(void) { return false; } static inline bool hv_is_hibernation_supported(void) { return false; } diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 650a0574b746..f565a8938836 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1584,6 +1584,11 @@ struct hyperv_service_callback { void (*callback)(void *context); }; +struct hv_dma_range { + dma_addr_t dma; + u32 mapping_size; +}; + #define MAX_SRV_VER 0x7ffffff extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, u32 buflen, const int *fw_version, int fw_vercnt, -- cgit v1.2.3 From ed98ea2128b6fd83bce13716edf8f5fe6c47f574 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 17 Dec 2021 09:01:52 +0800 Subject: audit: replace zero-length array with flexible-array member Zero-length arrays are deprecated and should be replaced with flexible-array members. Link: https://github.com/KSPP/linux/issues/78 Signed-off-by: Xiu Jianfeng Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 9176a095fefc..8eda133ca4c1 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -514,7 +514,7 @@ struct audit_rule_data { __u32 values[AUDIT_MAX_FIELDS]; __u32 fieldflags[AUDIT_MAX_FIELDS]; __u32 buflen; /* total length of string fields */ - char buf[0]; /* string fields buffer */ + char buf[]; /* string fields buffer */ }; #endif /* _UAPI_LINUX_AUDIT_H_ */ -- cgit v1.2.3 From 6fc61c39ee1adb5f4115d288c876772fcd8b6979 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sun, 21 Nov 2021 01:20:46 +0100 Subject: soc: qcom: llcc: Add configuration data for SM8350 Add LLCC configuration data for SM8350 SoC. Signed-off-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211121002050.36977-2-konrad.dybcio@somainline.org --- include/linux/soc/qcom/llcc-qcom.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 437c9df13229..9e8fd92c96b7 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -33,6 +33,9 @@ #define LLCC_MODPE 29 #define LLCC_APTCM 30 #define LLCC_WRCACHE 31 +#define LLCC_CVPFW 32 +#define LLCC_CPUSS1 33 +#define LLCC_CPUHWT 36 /** * struct llcc_slice_desc - Cache slice descriptor -- cgit v1.2.3 From 8712107740ad272bd89c873ec850146ac3d8832a Mon Sep 17 00:00:00 2001 From: Martin Botka Date: Tue, 30 Nov 2021 22:23:29 +0100 Subject: dt-bindings: qcom-rpmpd: Add sm6125 power domains Add dt-bindings for sm6125 SoC RPM Power Domains Signed-off-by: Martin Botka [bjorn: Added compatible to binding as well] Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211130212332.25401-1-martin.botka@somainline.org --- include/dt-bindings/power/qcom-rpmpd.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h index 340b0ffe5eb8..5b64059ef43e 100644 --- a/include/dt-bindings/power/qcom-rpmpd.h +++ b/include/dt-bindings/power/qcom-rpmpd.h @@ -219,6 +219,14 @@ #define SM6115_VDD_LPI_CX 6 #define SM6115_VDD_LPI_MX 7 +/* SM6125 Power Domains */ +#define SM6125_VDDCX 0 +#define SM6125_VDDCX_AO 1 +#define SM6125_VDDCX_VFL 2 +#define SM6125_VDDMX 3 +#define SM6125_VDDMX_AO 4 +#define SM6125_VDDMX_VFL 5 + /* QCM2290 Power Domains */ #define QCM2290_VDDCX 0 #define QCM2290_VDDCX_AO 1 -- cgit v1.2.3 From 22c755708c2395c58eb7670357baa110ba1ca2e0 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 1 Dec 2021 12:57:43 +0530 Subject: dt-bindings: power: rpmpd: Add SM8450 to rpmpd binding Add compatible and constants for the power domains exposed by the RPMH in the Qualcomm SM8450 platform. Signed-off-by: Dmitry Baryshkov Signed-off-by: Vinod Koul Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211201072745.3969077-6-vkoul@kernel.org --- include/dt-bindings/power/qcom-rpmpd.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h index 5b64059ef43e..edfc1ff2acb3 100644 --- a/include/dt-bindings/power/qcom-rpmpd.h +++ b/include/dt-bindings/power/qcom-rpmpd.h @@ -68,6 +68,21 @@ #define SM8350_MXC_AO 11 #define SM8350_MSS 12 +/* SM8450 Power Domain Indexes */ +#define SM8450_CX 0 +#define SM8450_CX_AO 1 +#define SM8450_EBI 2 +#define SM8450_GFX 3 +#define SM8450_LCX 4 +#define SM8450_LMX 5 +#define SM8450_MMCX 6 +#define SM8450_MMCX_AO 7 +#define SM8450_MX 8 +#define SM8450_MX_AO 9 +#define SM8450_MXC 10 +#define SM8450_MXC_AO 11 +#define SM8450_MSS 12 + /* SC7180 Power Domain Indexes */ #define SC7180_CX 0 #define SC7180_CX_AO 1 -- cgit v1.2.3 From dbcefdeb2a58039f4c81d0361056fbdd9be906a1 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Mon, 20 Dec 2021 10:31:04 +0800 Subject: mctp: emit RTM_NEWADDR and RTM_DELADDR Userspace can receive notification of MCTP address changes via RTNLGRP_MCTP_IFADDR rtnetlink multicast group. Signed-off-by: Matt Johnston Link: https://lore.kernel.org/r/20211220023104.1965509-1-matt@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- include/uapi/linux/rtnetlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 5888492a5257..93d934cc4613 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -754,6 +754,8 @@ enum rtnetlink_groups { #define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP RTNLGRP_BRVLAN, #define RTNLGRP_BRVLAN RTNLGRP_BRVLAN + RTNLGRP_MCTP_IFADDR, +#define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) -- cgit v1.2.3 From 8f905c0e7354ef261360fb7535ea079b1082c105 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Dec 2021 06:33:30 -0800 Subject: inet: fully convert sk->sk_rx_dst to RCU rules syzbot reported various issues around early demux, one being included in this changelog [1] sk->sk_rx_dst is using RCU protection without clearly documenting it. And following sequences in tcp_v4_do_rcv()/tcp_v6_do_rcv() are not following standard RCU rules. [a] dst_release(dst); [b] sk->sk_rx_dst = NULL; They look wrong because a delete operation of RCU protected pointer is supposed to clear the pointer before the call_rcu()/synchronize_rcu() guarding actual memory freeing. In some cases indeed, dst could be freed before [b] is done. We could cheat by clearing sk_rx_dst before calling dst_release(), but this seems the right time to stick to standard RCU annotations and debugging facilities. [1] BUG: KASAN: use-after-free in dst_check include/net/dst.h:470 [inline] BUG: KASAN: use-after-free in tcp_v4_early_demux+0x95b/0x960 net/ipv4/tcp_ipv4.c:1792 Read of size 2 at addr ffff88807f1cb73a by task syz-executor.5/9204 CPU: 0 PID: 9204 Comm: syz-executor.5 Not tainted 5.16.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0x8d/0x320 mm/kasan/report.c:247 __kasan_report mm/kasan/report.c:433 [inline] kasan_report.cold+0x83/0xdf mm/kasan/report.c:450 dst_check include/net/dst.h:470 [inline] tcp_v4_early_demux+0x95b/0x960 net/ipv4/tcp_ipv4.c:1792 ip_rcv_finish_core.constprop.0+0x15de/0x1e80 net/ipv4/ip_input.c:340 ip_list_rcv_finish.constprop.0+0x1b2/0x6e0 net/ipv4/ip_input.c:583 ip_sublist_rcv net/ipv4/ip_input.c:609 [inline] ip_list_rcv+0x34e/0x490 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5508 [inline] __netif_receive_skb_list_core+0x549/0x8e0 net/core/dev.c:5556 __netif_receive_skb_list net/core/dev.c:5608 [inline] netif_receive_skb_list_internal+0x75e/0xd80 net/core/dev.c:5699 gro_normal_list net/core/dev.c:5853 [inline] gro_normal_list net/core/dev.c:5849 [inline] napi_complete_done+0x1f1/0x880 net/core/dev.c:6590 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0xca2/0x11b0 drivers/net/virtio_net.c:1557 __napi_poll+0xaf/0x440 net/core/dev.c:7023 napi_poll net/core/dev.c:7090 [inline] net_rx_action+0x801/0xb40 net/core/dev.c:7177 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu+0x123/0x180 kernel/softirq.c:637 irq_exit_rcu+0x5/0x20 kernel/softirq.c:649 common_interrupt+0x52/0xc0 arch/x86/kernel/irq.c:240 asm_common_interrupt+0x1e/0x40 arch/x86/include/asm/idtentry.h:629 RIP: 0033:0x7f5e972bfd57 Code: 39 d1 73 14 0f 1f 80 00 00 00 00 48 8b 50 f8 48 83 e8 08 48 39 ca 77 f3 48 39 c3 73 3e 48 89 13 48 8b 50 f8 48 89 38 49 8b 0e <48> 8b 3e 48 83 c3 08 48 83 c6 08 eb bc 48 39 d1 72 9e 48 39 d0 73 RSP: 002b:00007fff8a413210 EFLAGS: 00000283 RAX: 00007f5e97108990 RBX: 00007f5e97108338 RCX: ffffffff81d3aa45 RDX: ffffffff81d3aa45 RSI: 00007f5e97108340 RDI: ffffffff81d3aa45 RBP: 00007f5e97107eb8 R08: 00007f5e97108d88 R09: 0000000093c2e8d9 R10: 0000000000000000 R11: 0000000000000000 R12: 00007f5e97107eb0 R13: 00007f5e97108338 R14: 00007f5e97107ea8 R15: 0000000000000019 Allocated by task 13: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:434 [inline] __kasan_slab_alloc+0x90/0xc0 mm/kasan/common.c:467 kasan_slab_alloc include/linux/kasan.h:259 [inline] slab_post_alloc_hook mm/slab.h:519 [inline] slab_alloc_node mm/slub.c:3234 [inline] slab_alloc mm/slub.c:3242 [inline] kmem_cache_alloc+0x202/0x3a0 mm/slub.c:3247 dst_alloc+0x146/0x1f0 net/core/dst.c:92 rt_dst_alloc+0x73/0x430 net/ipv4/route.c:1613 ip_route_input_slow+0x1817/0x3a20 net/ipv4/route.c:2340 ip_route_input_rcu net/ipv4/route.c:2470 [inline] ip_route_input_noref+0x116/0x2a0 net/ipv4/route.c:2415 ip_rcv_finish_core.constprop.0+0x288/0x1e80 net/ipv4/ip_input.c:354 ip_list_rcv_finish.constprop.0+0x1b2/0x6e0 net/ipv4/ip_input.c:583 ip_sublist_rcv net/ipv4/ip_input.c:609 [inline] ip_list_rcv+0x34e/0x490 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5508 [inline] __netif_receive_skb_list_core+0x549/0x8e0 net/core/dev.c:5556 __netif_receive_skb_list net/core/dev.c:5608 [inline] netif_receive_skb_list_internal+0x75e/0xd80 net/core/dev.c:5699 gro_normal_list net/core/dev.c:5853 [inline] gro_normal_list net/core/dev.c:5849 [inline] napi_complete_done+0x1f1/0x880 net/core/dev.c:6590 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0xca2/0x11b0 drivers/net/virtio_net.c:1557 __napi_poll+0xaf/0x440 net/core/dev.c:7023 napi_poll net/core/dev.c:7090 [inline] net_rx_action+0x801/0xb40 net/core/dev.c:7177 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 Freed by task 13: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 kasan_set_track+0x21/0x30 mm/kasan/common.c:46 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 ____kasan_slab_free mm/kasan/common.c:366 [inline] ____kasan_slab_free mm/kasan/common.c:328 [inline] __kasan_slab_free+0xff/0x130 mm/kasan/common.c:374 kasan_slab_free include/linux/kasan.h:235 [inline] slab_free_hook mm/slub.c:1723 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1749 slab_free mm/slub.c:3513 [inline] kmem_cache_free+0xbd/0x5d0 mm/slub.c:3530 dst_destroy+0x2d6/0x3f0 net/core/dst.c:127 rcu_do_batch kernel/rcu/tree.c:2506 [inline] rcu_core+0x7ab/0x1470 kernel/rcu/tree.c:2741 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 Last potentially related work creation: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 __kasan_record_aux_stack+0xf5/0x120 mm/kasan/generic.c:348 __call_rcu kernel/rcu/tree.c:2985 [inline] call_rcu+0xb1/0x740 kernel/rcu/tree.c:3065 dst_release net/core/dst.c:177 [inline] dst_release+0x79/0xe0 net/core/dst.c:167 tcp_v4_do_rcv+0x612/0x8d0 net/ipv4/tcp_ipv4.c:1712 sk_backlog_rcv include/net/sock.h:1030 [inline] __release_sock+0x134/0x3b0 net/core/sock.c:2768 release_sock+0x54/0x1b0 net/core/sock.c:3300 tcp_sendmsg+0x36/0x40 net/ipv4/tcp.c:1441 inet_sendmsg+0x99/0xe0 net/ipv4/af_inet.c:819 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 sock_write_iter+0x289/0x3c0 net/socket.c:1057 call_write_iter include/linux/fs.h:2162 [inline] new_sync_write+0x429/0x660 fs/read_write.c:503 vfs_write+0x7cd/0xae0 fs/read_write.c:590 ksys_write+0x1ee/0x250 fs/read_write.c:643 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff88807f1cb700 which belongs to the cache ip_dst_cache of size 176 The buggy address is located 58 bytes inside of 176-byte region [ffff88807f1cb700, ffff88807f1cb7b0) The buggy address belongs to the page: page:ffffea0001fc72c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x7f1cb flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000000200 dead000000000100 dead000000000122 ffff8881413bb780 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x112a20(GFP_ATOMIC|__GFP_NOWARN|__GFP_NORETRY|__GFP_HARDWALL), pid 5, ts 108466983062, free_ts 108048976062 prep_new_page mm/page_alloc.c:2418 [inline] get_page_from_freelist+0xa72/0x2f50 mm/page_alloc.c:4149 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5369 alloc_pages+0x1a7/0x300 mm/mempolicy.c:2191 alloc_slab_page mm/slub.c:1793 [inline] allocate_slab mm/slub.c:1930 [inline] new_slab+0x32d/0x4a0 mm/slub.c:1993 ___slab_alloc+0x918/0xfe0 mm/slub.c:3022 __slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3109 slab_alloc_node mm/slub.c:3200 [inline] slab_alloc mm/slub.c:3242 [inline] kmem_cache_alloc+0x35c/0x3a0 mm/slub.c:3247 dst_alloc+0x146/0x1f0 net/core/dst.c:92 rt_dst_alloc+0x73/0x430 net/ipv4/route.c:1613 __mkroute_output net/ipv4/route.c:2564 [inline] ip_route_output_key_hash_rcu+0x921/0x2d00 net/ipv4/route.c:2791 ip_route_output_key_hash+0x18b/0x300 net/ipv4/route.c:2619 __ip_route_output_key include/net/route.h:126 [inline] ip_route_output_flow+0x23/0x150 net/ipv4/route.c:2850 ip_route_output_key include/net/route.h:142 [inline] geneve_get_v4_rt+0x3a6/0x830 drivers/net/geneve.c:809 geneve_xmit_skb drivers/net/geneve.c:899 [inline] geneve_xmit+0xc4a/0x3540 drivers/net/geneve.c:1082 __netdev_start_xmit include/linux/netdevice.h:4994 [inline] netdev_start_xmit include/linux/netdevice.h:5008 [inline] xmit_one net/core/dev.c:3590 [inline] dev_hard_start_xmit+0x1eb/0x920 net/core/dev.c:3606 __dev_queue_xmit+0x299a/0x3650 net/core/dev.c:4229 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1338 [inline] free_pcp_prepare+0x374/0x870 mm/page_alloc.c:1389 free_unref_page_prepare mm/page_alloc.c:3309 [inline] free_unref_page+0x19/0x690 mm/page_alloc.c:3388 qlink_free mm/kasan/quarantine.c:146 [inline] qlist_free_all+0x5a/0xc0 mm/kasan/quarantine.c:165 kasan_quarantine_reduce+0x180/0x200 mm/kasan/quarantine.c:272 __kasan_slab_alloc+0xa2/0xc0 mm/kasan/common.c:444 kasan_slab_alloc include/linux/kasan.h:259 [inline] slab_post_alloc_hook mm/slab.h:519 [inline] slab_alloc_node mm/slub.c:3234 [inline] kmem_cache_alloc_node+0x255/0x3f0 mm/slub.c:3270 __alloc_skb+0x215/0x340 net/core/skbuff.c:414 alloc_skb include/linux/skbuff.h:1126 [inline] alloc_skb_with_frags+0x93/0x620 net/core/skbuff.c:6078 sock_alloc_send_pskb+0x783/0x910 net/core/sock.c:2575 mld_newpack+0x1df/0x770 net/ipv6/mcast.c:1754 add_grhead+0x265/0x330 net/ipv6/mcast.c:1857 add_grec+0x1053/0x14e0 net/ipv6/mcast.c:1995 mld_send_initial_cr.part.0+0xf6/0x230 net/ipv6/mcast.c:2242 mld_send_initial_cr net/ipv6/mcast.c:1232 [inline] mld_dad_work+0x1d3/0x690 net/ipv6/mcast.c:2268 process_one_work+0x9b2/0x1690 kernel/workqueue.c:2298 worker_thread+0x658/0x11f0 kernel/workqueue.c:2445 Memory state around the buggy address: ffff88807f1cb600: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88807f1cb680: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc >ffff88807f1cb700: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88807f1cb780: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc ffff88807f1cb800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 41063e9dd119 ("ipv4: Early TCP socket demux.") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20211220143330.680945-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index bea21ff70e74..d47e9658da28 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -431,7 +431,7 @@ struct sock { #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif - struct dst_entry *sk_rx_dst; + struct dst_entry __rcu *sk_rx_dst; int sk_rx_dst_ifindex; u32 sk_rx_dst_cookie; -- cgit v1.2.3 From 7e5cced9ca84df52d874aca6b632f930b3dc5bc6 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 20 Dec 2021 09:49:01 -0500 Subject: net: accept UFOv6 packages in virtio_net_hdr_to_skb Skb with skb->protocol 0 at the time of virtio_net_hdr_to_skb may have a protocol inferred from virtio_net_hdr with virtio_net_hdr_set_proto. Unlike TCP, UDP does not have separate types for IPv4 and IPv6. Type VIRTIO_NET_HDR_GSO_UDP is guessed to be IPv4/UDP. As of the below commit, UFOv6 packets are dropped due to not matching the protocol as obtained from dev_parse_header_protocol. Invert the test to take that L2 protocol field as starting point and pass both UFOv4 and UFOv6 for VIRTIO_NET_HDR_GSO_UDP. Fixes: 924a9bc362a5 ("net: check if protocol extracted by virtio_net_hdr_set_proto is correct") Link: https://lore.kernel.org/netdev/CABcq3pG9GRCYqFDBAJ48H1vpnnX=41u+MhQnayF1ztLH4WX0Fw@mail.gmail.com/ Reported-by: Andrew Melnichenko Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211220144901.2784030-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/virtio_net.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 04e87f4b9417..22dd48c82560 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -7,6 +7,21 @@ #include #include +static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) +{ + switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + return protocol == cpu_to_be16(ETH_P_IP); + case VIRTIO_NET_HDR_GSO_TCPV6: + return protocol == cpu_to_be16(ETH_P_IPV6); + case VIRTIO_NET_HDR_GSO_UDP: + return protocol == cpu_to_be16(ETH_P_IP) || + protocol == cpu_to_be16(ETH_P_IPV6); + default: + return false; + } +} + static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, const struct virtio_net_hdr *hdr) { @@ -88,9 +103,12 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb->protocol) { __be16 protocol = dev_parse_header_protocol(skb); - virtio_net_hdr_set_proto(skb, hdr); - if (protocol && protocol != skb->protocol) + if (!protocol) + virtio_net_hdr_set_proto(skb, hdr); + else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type)) return -EINVAL; + else + skb->protocol = protocol; } retry: if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, -- cgit v1.2.3 From 1ed1d592113959f00cc552c3b9f47ca2d157768f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 20 Dec 2021 09:50:27 -0500 Subject: net: skip virtio_net_hdr_set_proto if protocol already set virtio_net_hdr_set_proto infers skb->protocol from the virtio_net_hdr gso_type, to avoid packets getting dropped for lack of a proto type. Its protocol choice is a guess, especially in the case of UFO, where the single VIRTIO_NET_HDR_GSO_UDP label covers both UFOv4 and UFOv6. Skip this best effort if the field is already initialized. Whether explicitly from userspace, or implicitly based on an earlier call to dev_parse_header_protocol (which is more robust, but was introduced after this patch). Fixes: 9d2f67e43b73 ("net/packet: fix packet drop as of virtio gso") Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211220145027.2784293-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/virtio_net.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 22dd48c82560..a960de68ac69 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -25,6 +25,9 @@ static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, const struct virtio_net_hdr *hdr) { + if (skb->protocol) + return 0; + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_UDP: -- cgit v1.2.3 From 80a5ca99c5c04be6777df225ab932142a9d60c3f Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 16 Dec 2021 11:33:00 +0800 Subject: rapidio: remove not used macro definition in rio_ids.h The definition of RIO_VID_FREESCALE, RIO_DID_MPC8560, RIO_DID_TSI500, RIO_DID_TSI576 and RIO_DID_TSI721 are not used for many years in the current code, so just remove them. Signed-off-by: Tiezhu Yang Link: https://lore.kernel.org/r/1639625581-22867-2-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Greg Kroah-Hartman --- include/linux/rio_ids.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h index 4846f72759b2..e74d8840708a 100644 --- a/include/linux/rio_ids.h +++ b/include/linux/rio_ids.h @@ -9,15 +9,10 @@ #ifndef LINUX_RIO_IDS_H #define LINUX_RIO_IDS_H -#define RIO_VID_FREESCALE 0x0002 -#define RIO_DID_MPC8560 0x0003 - #define RIO_VID_TUNDRA 0x000d -#define RIO_DID_TSI500 0x0500 #define RIO_DID_TSI568 0x0568 #define RIO_DID_TSI572 0x0572 #define RIO_DID_TSI574 0x0574 -#define RIO_DID_TSI576 0x0578 /* Same ID as Tsi578 */ #define RIO_DID_TSI577 0x0577 #define RIO_DID_TSI578 0x0578 @@ -33,7 +28,6 @@ #define RIO_DID_IDTCPS1616 0x0379 #define RIO_DID_IDTVPS1616 0x0377 #define RIO_DID_IDTSPS1616 0x0378 -#define RIO_DID_TSI721 0x80ab #define RIO_DID_IDTRXS1632 0x80e5 #define RIO_DID_IDTRXS2448 0x80e6 -- cgit v1.2.3 From 612d4904191ff9aca01b1e087d8687b3a223cb33 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 16 Dec 2021 11:33:01 +0800 Subject: rapidio: remove not used code about RIO_VID_TUNDRA According to https://rapidio.org/vendor-id/, there is no 0x000d vendor id in the complete and current list of VendorIDs, it means that the related code is dead code now, so just remove them. Signed-off-by: Tiezhu Yang Link: https://lore.kernel.org/r/1639625581-22867-3-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Greg Kroah-Hartman --- include/linux/rio_ids.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h index e74d8840708a..c7e2f21dd5c1 100644 --- a/include/linux/rio_ids.h +++ b/include/linux/rio_ids.h @@ -9,13 +9,6 @@ #ifndef LINUX_RIO_IDS_H #define LINUX_RIO_IDS_H -#define RIO_VID_TUNDRA 0x000d -#define RIO_DID_TSI568 0x0568 -#define RIO_DID_TSI572 0x0572 -#define RIO_DID_TSI574 0x0574 -#define RIO_DID_TSI577 0x0577 -#define RIO_DID_TSI578 0x0578 - #define RIO_VID_IDT 0x0038 #define RIO_DID_IDT70K200 0x0310 #define RIO_DID_IDTCPS8 0x035c -- cgit v1.2.3 From 0032ca576a79946492194ae4860b462d32815c66 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Tue, 21 Dec 2021 17:16:46 +0900 Subject: counter: Add the necessary colons and indents to the comments of counter_compi Since commit aaec1a0f76ec ("counter: Internalize sysfs interface code") introduce a warning as: linux-next/Documentation/driver-api/generic-counter:234: ./include/linux/counter.h:43: WARNING: Unexpected indentation. linux-next/Documentation/driver-api/generic-counter:234: ./include/linux/counter.h:45: WARNING: Block quote ends without a blank line; unexpected unindent. Add the necessary colons and indents. Fixes: aaec1a0f76ec ("counter: Internalize sysfs interface code") Signed-off-by: Yanteng Si Signed-off-by: William Breathitt Gray Link: https://lore.kernel.org/r/26011e814d6eca02c7ebdbb92f171a49928a7e89.1640072891.git.vilhelm.gray@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/counter.h | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/counter.h b/include/linux/counter.h index b7d0a00a61cf..dfbde2808998 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -38,64 +38,64 @@ enum counter_comp_type { * @type: Counter component data type * @name: device-specific component name * @priv: component-relevant data - * @action_read Synapse action mode read callback. The read value of the + * @action_read: Synapse action mode read callback. The read value of the * respective Synapse action mode should be passed back via * the action parameter. - * @device_u8_read Device u8 component read callback. The read value of the + * @device_u8_read: Device u8 component read callback. The read value of the * respective Device u8 component should be passed back via * the val parameter. - * @count_u8_read Count u8 component read callback. The read value of the + * @count_u8_read: Count u8 component read callback. The read value of the * respective Count u8 component should be passed back via * the val parameter. - * @signal_u8_read Signal u8 component read callback. The read value of the + * @signal_u8_read: Signal u8 component read callback. The read value of the * respective Signal u8 component should be passed back via * the val parameter. - * @device_u32_read Device u32 component read callback. The read value of + * @device_u32_read: Device u32 component read callback. The read value of * the respective Device u32 component should be passed * back via the val parameter. - * @count_u32_read Count u32 component read callback. The read value of the + * @count_u32_read: Count u32 component read callback. The read value of the * respective Count u32 component should be passed back via * the val parameter. - * @signal_u32_read Signal u32 component read callback. The read value of + * @signal_u32_read: Signal u32 component read callback. The read value of * the respective Signal u32 component should be passed * back via the val parameter. - * @device_u64_read Device u64 component read callback. The read value of + * @device_u64_read: Device u64 component read callback. The read value of * the respective Device u64 component should be passed * back via the val parameter. - * @count_u64_read Count u64 component read callback. The read value of the + * @count_u64_read: Count u64 component read callback. The read value of the * respective Count u64 component should be passed back via * the val parameter. - * @signal_u64_read Signal u64 component read callback. The read value of + * @signal_u64_read: Signal u64 component read callback. The read value of * the respective Signal u64 component should be passed * back via the val parameter. - * @action_write Synapse action mode write callback. The write value of + * @action_write: Synapse action mode write callback. The write value of * the respective Synapse action mode is passed via the * action parameter. - * @device_u8_write Device u8 component write callback. The write value of + * @device_u8_write: Device u8 component write callback. The write value of * the respective Device u8 component is passed via the val * parameter. - * @count_u8_write Count u8 component write callback. The write value of + * @count_u8_write: Count u8 component write callback. The write value of * the respective Count u8 component is passed via the val * parameter. - * @signal_u8_write Signal u8 component write callback. The write value of + * @signal_u8_write: Signal u8 component write callback. The write value of * the respective Signal u8 component is passed via the val * parameter. - * @device_u32_write Device u32 component write callback. The write value of + * @device_u32_write: Device u32 component write callback. The write value of * the respective Device u32 component is passed via the * val parameter. - * @count_u32_write Count u32 component write callback. The write value of + * @count_u32_write: Count u32 component write callback. The write value of * the respective Count u32 component is passed via the val * parameter. - * @signal_u32_write Signal u32 component write callback. The write value of + * @signal_u32_write: Signal u32 component write callback. The write value of * the respective Signal u32 component is passed via the * val parameter. - * @device_u64_write Device u64 component write callback. The write value of + * @device_u64_write: Device u64 component write callback. The write value of * the respective Device u64 component is passed via the * val parameter. - * @count_u64_write Count u64 component write callback. The write value of + * @count_u64_write: Count u64 component write callback. The write value of * the respective Count u64 component is passed via the val * parameter. - * @signal_u64_write Signal u64 component write callback. The write value of + * @signal_u64_write: Signal u64 component write callback. The write value of * the respective Signal u64 component is passed via the * val parameter. */ -- cgit v1.2.3 From 79f1c7304295bbbc611bc53cfd5425b777b3e840 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 9 Dec 2021 14:30:08 +0200 Subject: kernfs: Replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Acked-by: Tejun Heo Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211209123008.3391-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 9f650986a81b..861c4f0f8a29 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -6,7 +6,6 @@ #ifndef __LINUX_KERNFS_H #define __LINUX_KERNFS_H -#include #include #include #include @@ -14,6 +13,8 @@ #include #include #include +#include +#include #include #include #include @@ -23,6 +24,7 @@ struct dentry; struct iattr; struct seq_file; struct vm_area_struct; +struct vm_operations_struct; struct super_block; struct file_system_type; struct poll_table_struct; -- cgit v1.2.3 From eca6e2d4a4a4b824f055eeaaa24f1c2327fb91a2 Mon Sep 17 00:00:00 2001 From: Anand Ashok Dumbre Date: Fri, 3 Dec 2021 21:23:54 +0000 Subject: device property: Add fwnode_iomap() This patch introduces a new helper routine - fwnode_iomap(), which allows to map the memory mapped IO for a given device node. This implementation does not cover the ACPI case and may be expanded in the future. The main purpose here is to be able to develop resource provider agnostic drivers. Suggested-by: Andy Shevchenko Signed-off-by: Anand Ashok Dumbre Reviewed-by: Andy Shevchenko Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20211203212358.31444-2-anand.ashok.dumbre@xilinx.com Signed-off-by: Jonathan Cameron --- include/linux/property.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/property.h b/include/linux/property.h index 88fa726a76df..6670d5a1ec2a 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -122,6 +122,8 @@ void fwnode_handle_put(struct fwnode_handle *fwnode); int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index); +void __iomem *fwnode_iomap(struct fwnode_handle *fwnode, int index); + unsigned int device_get_child_node_count(struct device *dev); static inline bool device_property_read_bool(struct device *dev, -- cgit v1.2.3 From 1b0b6cc8030d08d2a24e9e5f85dc36c5a58200ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 24 Nov 2021 00:27:01 +0100 Subject: power: supply: add charge_behaviour attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This a revised version of "[RFC] add standardized attributes for force_discharge and inhibit_charge" [0], incorporating discussion results. The biggest change is the switch from two boolean attributes to a single enum attribute. [0] https://lore.kernel.org/platform-driver-x86/21569a89-8303-8573-05fb-c2fec29983d1@gmail.com/ Signed-off-by: Thomas Weißschuh Acked-by: Sebastian Reichel Link: https://lore.kernel.org/r/20211123232704.25394-2-linux@weissschuh.net Signed-off-by: Hans de Goede --- include/linux/power_supply.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 9ca1f120a211..70c333e86293 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -132,6 +132,7 @@ enum power_supply_property { POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, POWER_SUPPLY_PROP_CHARGE_CONTROL_START_THRESHOLD, /* in percents! */ POWER_SUPPLY_PROP_CHARGE_CONTROL_END_THRESHOLD, /* in percents! */ + POWER_SUPPLY_PROP_CHARGE_BEHAVIOUR, POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT, POWER_SUPPLY_PROP_INPUT_VOLTAGE_LIMIT, POWER_SUPPLY_PROP_INPUT_POWER_LIMIT, @@ -202,6 +203,12 @@ enum power_supply_usb_type { POWER_SUPPLY_USB_TYPE_APPLE_BRICK_ID, /* Apple Charging Method */ }; +enum power_supply_charge_behaviour { + POWER_SUPPLY_CHARGE_BEHAVIOUR_AUTO = 0, + POWER_SUPPLY_CHARGE_BEHAVIOUR_INHIBIT_CHARGE, + POWER_SUPPLY_CHARGE_BEHAVIOUR_FORCE_DISCHARGE, +}; + enum power_supply_notifier_events { PSY_EVENT_PROP_CHANGED, }; -- cgit v1.2.3 From 539b9c94ac83563842a27e8cc3de5164b15c4de0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 24 Nov 2021 00:27:02 +0100 Subject: power: supply: add helpers for charge_behaviour sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These helper functions can be used by drivers to implement their own sysfs-attributes. This is useful for ACPI-drivers extending the default ACPI-battery with their own charge_behaviour attributes. Signed-off-by: Thomas Weißschuh Acked-by: Sebastian Reichel Link: https://lore.kernel.org/r/20211123232704.25394-3-linux@weissschuh.net Signed-off-by: Hans de Goede --- include/linux/power_supply.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 70c333e86293..71f0379c2af8 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -546,4 +546,13 @@ static inline void power_supply_remove_hwmon_sysfs(struct power_supply *psy) {} #endif +#ifdef CONFIG_SYSFS +ssize_t power_supply_charge_behaviour_show(struct device *dev, + unsigned int available_behaviours, + enum power_supply_charge_behaviour behaviour, + char *buf); + +int power_supply_charge_behaviour_parse(unsigned int available_behaviours, const char *buf); +#endif + #endif /* __LINUX_POWER_SUPPLY_H__ */ -- cgit v1.2.3 From 37ae5a0f5287a52cf51242e76ccf198d02ffe495 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 18 Dec 2021 18:41:56 +0900 Subject: block: use "unsigned long" for blk_validate_block_size(). Since lo_simple_ioctl(LOOP_SET_BLOCK_SIZE) and ioctl(NBD_SET_BLKSIZE) pass user-controlled "unsigned long arg" to blk_validate_block_size(), "unsigned long" should be used for validation. Signed-off-by: Tetsuo Handa Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/9ecbf057-4375-c2db-ab53-e4cc0dff953d@i-love.sakura.ne.jp Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c80cfaefc0a8..bb5fb7282e6e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -45,7 +45,7 @@ struct blk_crypto_profile; */ #define BLKCG_MAX_POLS 6 -static inline int blk_validate_block_size(unsigned int bsize) +static inline int blk_validate_block_size(unsigned long bsize) { if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) return -EINVAL; -- cgit v1.2.3 From 5f174ec3c1d62013f86db6597249174d8cb227b2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Sep 2021 21:32:49 +0000 Subject: logic_io instance of iounmap() needs volatile on argument ... same as the rest of implementations Signed-off-by: Al Viro Signed-off-by: Richard Weinberger --- include/asm-generic/logic_io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/logic_io.h b/include/asm-generic/logic_io.h index a53116b8c57e..8a59b6e567df 100644 --- a/include/asm-generic/logic_io.h +++ b/include/asm-generic/logic_io.h @@ -34,7 +34,7 @@ void __iomem *ioremap(phys_addr_t offset, size_t size); #define iounmap iounmap -void iounmap(void __iomem *addr); +void iounmap(void volatile __iomem *addr); #define __raw_readb __raw_readb u8 __raw_readb(const volatile void __iomem *addr); -- cgit v1.2.3 From cb8747b7d2a9e3d687a19a007575071d4b71cd05 Mon Sep 17 00:00:00 2001 From: Ismael Luceno Date: Mon, 15 Nov 2021 14:46:47 +0100 Subject: uapi: Fix undefined __always_inline on non-glibc systems This macro is defined by glibc itself, which makes the issue go unnoticed on those systems. On non-glibc systems it causes build failures on several utilities and libraries, like bpftool and objtool. Fixes: 1d509f2a6ebc ("x86/insn: Support big endian cross-compiles") Fixes: 2d7ce0e8a704 ("tools/virtio: more stubs") Fixes: 3fb321fde22d ("selftests/net: ipv6 flowlabel") Fixes: 50b3ed57dee9 ("selftests/bpf: test bpf flow dissection") Fixes: 9cacf81f8161 ("bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE") Fixes: a4b2061242ec ("tools include uapi: Grab a copy of linux/in.h") Fixes: b12d6ec09730 ("bpf: btf: add btf print functionality") Fixes: c0dd967818a2 ("tools, include: Grab a copy of linux/erspan.h") Fixes: c4b6014e8bb0 ("tools: Add copy of perf_event.h to tools/include/linux/") Signed-off-by: Ismael Luceno Acked-by: Masami Hiramatsu Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211115134647.1921-1-ismael@iodev.co.uk Cc: Martin Schwidefsky Cc: Vasily Gorbik --- include/uapi/linux/byteorder/big_endian.h | 1 + include/uapi/linux/byteorder/little_endian.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/byteorder/big_endian.h b/include/uapi/linux/byteorder/big_endian.h index 2199adc6a6c2..80aa5c41a763 100644 --- a/include/uapi/linux/byteorder/big_endian.h +++ b/include/uapi/linux/byteorder/big_endian.h @@ -9,6 +9,7 @@ #define __BIG_ENDIAN_BITFIELD #endif +#include #include #include diff --git a/include/uapi/linux/byteorder/little_endian.h b/include/uapi/linux/byteorder/little_endian.h index 601c904fd5cd..cd98982e7523 100644 --- a/include/uapi/linux/byteorder/little_endian.h +++ b/include/uapi/linux/byteorder/little_endian.h @@ -9,6 +9,7 @@ #define __LITTLE_ENDIAN_BITFIELD #endif +#include #include #include -- cgit v1.2.3 From dcce50e6cc4d86a63dc0a9a6ee7d4f948ccd53a1 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 8 Nov 2021 14:35:59 -0800 Subject: compiler.h: Fix annotation macro misplacement with Clang When building with Clang and CONFIG_TRACE_BRANCH_PROFILING, there are a lot of unreachable warnings, like: arch/x86/kernel/traps.o: warning: objtool: handle_xfd_event()+0x134: unreachable instruction Without an input to the inline asm, 'volatile' is ignored for some reason and Clang feels free to move the reachable() annotation away from its intended location. Fix that by re-adding the counter value to the inputs. Fixes: f1069a8756b9 ("compiler.h: Avoid using inline asm operand modifiers") Fixes: c199f64ff93c ("instrumentation.h: Avoid using inline asm operand modifiers") Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/0417e96909b97a406323409210de7bf13df0b170.1636410380.git.jpoimboe@redhat.com Cc: Peter Zijlstra Cc: x86@kernel.org Cc: Vasily Gorbik Cc: Miroslav Benes --- include/linux/compiler.h | 4 ++-- include/linux/instrumentation.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 3d5af56337bd..429dcebe2b99 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -121,7 +121,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.reachable\n\t" \ ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define annotate_reachable() __annotate_reachable(__COUNTER__) @@ -129,7 +129,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.unreachable\n\t" \ ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define annotate_unreachable() __annotate_unreachable(__COUNTER__) diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h index fa2cd8c63dcc..24359b4a9605 100644 --- a/include/linux/instrumentation.h +++ b/include/linux/instrumentation.h @@ -11,7 +11,7 @@ asm volatile(__stringify(c) ": nop\n\t" \ ".pushsection .discard.instr_begin\n\t" \ ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define instrumentation_begin() __instrumentation_begin(__COUNTER__) @@ -50,7 +50,7 @@ asm volatile(__stringify(c) ": nop\n\t" \ ".pushsection .discard.instr_end\n\t" \ ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define instrumentation_end() __instrumentation_end(__COUNTER__) #else -- cgit v1.2.3 From 47402385d0b18ba46c6bfc4cff073c9359571fe4 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 9 Dec 2021 12:09:24 +0200 Subject: devlink: Add new "io_eq_size" generic device param Add new device generic parameter to determine the size of the I/O completion EQs. For example, to reduce I/O EQ size to 64, execute: $ devlink dev param set pci/0000:06:00.0 \ name io_eq_size value 64 cmode driverinit $ devlink dev reload pci/0000:06:00.0 Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- include/net/devlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 3276a29f2b81..b5f4acd0e0cd 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -459,6 +459,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -511,6 +512,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME "enable_iwarp" #define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME "io_eq_size" +#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ -- cgit v1.2.3 From 0b5705ebc355e79b71bf946058acec6820e27869 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 9 Dec 2021 12:09:26 +0200 Subject: devlink: Add new "event_eq_size" generic device param Add new device generic parameter to determine the size of the asynchronous control events EQ. For example, to reduce event EQ size to 64, execute: $ devlink dev param set pci/0000:06:00.0 \ name event_eq_size value 64 cmode driverinit $ devlink dev reload pci/0000:06:00.0 Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- include/net/devlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index b5f4acd0e0cd..8d5349d2fb68 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -460,6 +460,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP, DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -515,6 +516,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME "io_eq_size" #define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32 +#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME "event_eq_size" +#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ -- cgit v1.2.3 From f857acfc457ea63fa5b862d77f055665d863acfe Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 17 Nov 2021 14:53:48 -0700 Subject: lib/scatterlist: cleanup macros into static inline functions Convert the sg_is_chain(), sg_is_last() and sg_chain_ptr() macros into static inline functions. There's no reason for these to be macros and static inline are generally preferred these days. Also introduce the SG_PAGE_LINK_MASK define so the P2PDMA work, which is adding another bit to this mask, can do so more easily. Suggested-by: Jason Gunthorpe Signed-off-by: Logan Gunthorpe Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- include/linux/scatterlist.h | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 266754a55327..7ff9d6386c12 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -69,10 +69,27 @@ struct sg_append_table { * a valid sg entry, or whether it points to the start of a new scatterlist. * Those low bits are there for everyone! (thanks mason :-) */ -#define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN) -#define sg_is_last(sg) ((sg)->page_link & SG_END) -#define sg_chain_ptr(sg) \ - ((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END))) +#define SG_PAGE_LINK_MASK (SG_CHAIN | SG_END) + +static inline unsigned int __sg_flags(struct scatterlist *sg) +{ + return sg->page_link & SG_PAGE_LINK_MASK; +} + +static inline struct scatterlist *sg_chain_ptr(struct scatterlist *sg) +{ + return (struct scatterlist *)(sg->page_link & ~SG_PAGE_LINK_MASK); +} + +static inline bool sg_is_chain(struct scatterlist *sg) +{ + return __sg_flags(sg) & SG_CHAIN; +} + +static inline bool sg_is_last(struct scatterlist *sg) +{ + return __sg_flags(sg) & SG_END; +} /** * sg_assign_page - Assign a given page to an SG entry @@ -92,7 +109,7 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page) * In order for the low bit stealing approach to work, pages * must be aligned at a 32-bit boundary as a minimum. */ - BUG_ON((unsigned long) page & (SG_CHAIN | SG_END)); + BUG_ON((unsigned long)page & SG_PAGE_LINK_MASK); #ifdef CONFIG_DEBUG_SG BUG_ON(sg_is_chain(sg)); #endif @@ -126,7 +143,7 @@ static inline struct page *sg_page(struct scatterlist *sg) #ifdef CONFIG_DEBUG_SG BUG_ON(sg_is_chain(sg)); #endif - return (struct page *)((sg)->page_link & ~(SG_CHAIN | SG_END)); + return (struct page *)((sg)->page_link & ~SG_PAGE_LINK_MASK); } /** -- cgit v1.2.3 From d5624bb29f49b849ac8d1e9783dbf9c65cf33457 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Tue, 21 Dec 2021 11:55:56 +0800 Subject: asm-generic: introduce io_stop_wc() and add implementation for ARM64 For memory accesses with write-combining attributes (e.g. those returned by ioremap_wc()), the CPU may wait for prior accesses to be merged with subsequent ones. But in some situation, such wait is bad for the performance. We introduce io_stop_wc() to prevent the merging of write-combining memory accesses before this macro with those after it. We add implementation for ARM64 using DGH instruction and provide NOP implementation for other architectures. Signed-off-by: Xiongfeng Wang Suggested-by: Will Deacon Suggested-by: Catalin Marinas Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211221035556.60346-1-wangxiongfeng2@huawei.com Signed-off-by: Catalin Marinas --- include/asm-generic/barrier.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 640f09479bdf..4c2c1b830344 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -251,5 +251,16 @@ do { \ #define pmem_wmb() wmb() #endif +/* + * ioremap_wc() maps I/O memory as memory with write-combining attributes. For + * this kind of memory accesses, the CPU may wait for prior accesses to be + * merged with subsequent ones. In some situation, such wait is bad for the + * performance. io_stop_wc() can be used to prevent the merging of + * write-combining memory accesses before this macro with those after it. + */ +#ifndef io_stop_wc +#define io_stop_wc do { } while (0) +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_GENERIC_BARRIER_H */ -- cgit v1.2.3 From 80b3485f7d7bdb2468f2a9d6a346a1132d248309 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 7 Dec 2021 17:50:10 -0800 Subject: PCI: Add #defines for accessing PCIe DVSEC fields Add #defines for accessing Vendor ID, Revision, Length, and ID offsets in the Designated Vendor Specific Extended Capability (DVSEC). Defined in PCIe r5.0, sec 7.9.6. Reviewed-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20211208015015.891275-2-david.e.box@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/pci_regs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index ff6ccbc6efe9..318f3f1f9e92 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1086,7 +1086,11 @@ /* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */ #define PCI_DVSEC_HEADER1 0x4 /* Designated Vendor-Specific Header1 */ +#define PCI_DVSEC_HEADER1_VID(x) ((x) & 0xffff) +#define PCI_DVSEC_HEADER1_REV(x) (((x) >> 16) & 0xf) +#define PCI_DVSEC_HEADER1_LEN(x) (((x) >> 20) & 0xfff) #define PCI_DVSEC_HEADER2 0x8 /* Designated Vendor-Specific Header2 */ +#define PCI_DVSEC_HEADER2_ID(x) ((x) & 0xffff) /* Data Link Feature */ #define PCI_DLF_CAP 0x04 /* Capabilities Register */ -- cgit v1.2.3 From 365481e42a8a95c55e43e8cc236138718e762e7b Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 7 Dec 2021 17:50:11 -0800 Subject: driver core: auxiliary bus: Add driver data helpers Adds get/set driver data helpers for auxiliary devices. Reviewed-by: Mark Gross Reviewed-by: Andy Shevchenko Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20211208015015.891275-3-david.e.box@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index e6d8b5c16226..de21d9d24a95 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -188,6 +188,16 @@ struct auxiliary_driver { const struct auxiliary_device_id *id_table; }; +static inline void *auxiliary_get_drvdata(struct auxiliary_device *auxdev) +{ + return dev_get_drvdata(&auxdev->dev); +} + +static inline void auxiliary_set_drvdata(struct auxiliary_device *auxdev, void *data) +{ + dev_set_drvdata(&auxdev->dev, data); +} + static inline struct auxiliary_device *to_auxiliary_dev(struct device *dev) { return container_of(dev, struct auxiliary_device, dev); -- cgit v1.2.3 From b398123bff3bcbc1facb0f29bf6e7b9f1bc55931 Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Wed, 15 Dec 2021 10:13:48 +0800 Subject: efi: apply memblock cap after memblock_add() On arm64, during kdump kernel saves vmcore, it runs into the following bug: ... [ 15.148919] usercopy: Kernel memory exposure attempt detected from SLUB object 'kmem_cache_node' (offset 0, size 4096)! [ 15.159707] ------------[ cut here ]------------ [ 15.164311] kernel BUG at mm/usercopy.c:99! [ 15.168482] Internal error: Oops - BUG: 0 [#1] SMP [ 15.173261] Modules linked in: xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce sbsa_gwdt ast i2c_algo_bit drm_vram_helper drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops cec drm_ttm_helper ttm drm nvme nvme_core xgene_hwmon i2c_designware_platform i2c_designware_core dm_mirror dm_region_hash dm_log dm_mod overlay squashfs zstd_decompress loop [ 15.206186] CPU: 0 PID: 542 Comm: cp Not tainted 5.16.0-rc4 #1 [ 15.212006] Hardware name: GIGABYTE R272-P30-JG/MP32-AR0-JG, BIOS F12 (SCP: 1.5.20210426) 05/13/2021 [ 15.221125] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 15.228073] pc : usercopy_abort+0x9c/0xa0 [ 15.232074] lr : usercopy_abort+0x9c/0xa0 [ 15.236070] sp : ffff8000121abba0 [ 15.239371] x29: ffff8000121abbb0 x28: 0000000000003000 x27: 0000000000000000 [ 15.246494] x26: 0000000080000400 x25: 0000ffff885c7000 x24: 0000000000000000 [ 15.253617] x23: 000007ff80400000 x22: ffff07ff80401000 x21: 0000000000000001 [ 15.260739] x20: 0000000000001000 x19: ffff07ff80400000 x18: ffffffffffffffff [ 15.267861] x17: 656a626f2042554c x16: 53206d6f72662064 x15: 6574636574656420 [ 15.274983] x14: 74706d6574746120 x13: 2129363930342065 x12: 7a6973202c302074 [ 15.282105] x11: ffffc8b041d1b148 x10: 00000000ffff8000 x9 : ffffc8b04012812c [ 15.289228] x8 : 00000000ffff7fff x7 : ffffc8b041d1b148 x6 : 0000000000000000 [ 15.296349] x5 : 0000000000000000 x4 : 0000000000007fff x3 : 0000000000000000 [ 15.303471] x2 : 0000000000000000 x1 : ffff07ff8c064800 x0 : 000000000000006b [ 15.310593] Call trace: [ 15.313027] usercopy_abort+0x9c/0xa0 [ 15.316677] __check_heap_object+0xd4/0xf0 [ 15.320762] __check_object_size.part.0+0x160/0x1e0 [ 15.325628] __check_object_size+0x2c/0x40 [ 15.329711] copy_oldmem_page+0x7c/0x140 [ 15.333623] read_from_oldmem.part.0+0xfc/0x1c0 [ 15.338142] __read_vmcore.constprop.0+0x23c/0x350 [ 15.342920] read_vmcore+0x28/0x34 [ 15.346309] proc_reg_read+0xb4/0xf0 [ 15.349871] vfs_read+0xb8/0x1f0 [ 15.353088] ksys_read+0x74/0x100 [ 15.356390] __arm64_sys_read+0x28/0x34 ... This bug introduced by commit b261dba2fdb2 ("arm64: kdump: Remove custom linux,usable-memory-range handling"), which moves memblock_cap_memory_range() to fdt, but it breaches the rules that memblock_cap_memory_range() should come after memblock_add() etc as said in commit e888fa7bb882 ("memblock: Check memory add/cap ordering"). As a consequence, the virtual address set up by copy_oldmem_page() does not bail out from the test of virt_addr_valid() in check_heap_object(), and finally hits the BUG_ON(). Since memblock allocator has no idea about when the memblock is fully populated, while efi_init() is aware, so tackling this issue by calling the interface early_init_dt_check_for_usable_mem_range() exposed by of/fdt. Fixes: b261dba2fdb2 ("arm64: kdump: Remove custom linux,usable-memory-range handling") Signed-off-by: Pingfan Liu Cc: Rob Herring Cc: Zhen Lei Cc: Catalin Marinas Cc: Will Deacon Cc: Andrew Morton Cc: Mike Rapoport Cc: Geert Uytterhoeven Cc: Frank Rowand Cc: Ard Biesheuvel Cc: Nick Terrell Cc: linux-arm-kernel@lists.infradead.org To: devicetree@vger.kernel.org To: linux-efi@vger.kernel.org Acked-by: Ard Biesheuvel Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20211215021348.8766-1-kernelfans@gmail.com --- include/linux/of_fdt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index cf48983d3c86..ad09beb6d13c 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -62,6 +62,7 @@ extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data); extern int early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data); +extern void early_init_dt_check_for_usable_mem_range(void); extern int early_init_dt_scan_chosen_stdout(void); extern void early_init_fdt_scan_reserved_mem(void); extern void early_init_fdt_reserve_self(void); @@ -86,6 +87,7 @@ extern void unflatten_and_copy_device_tree(void); extern void early_init_devtree(void *); extern void early_get_first_memblock_info(void *, phys_addr_t *); #else /* CONFIG_OF_EARLY_FLATTREE */ +static inline void early_init_dt_check_for_usable_mem_range(void) {} static inline int early_init_dt_scan_chosen_stdout(void) { return -ENODEV; } static inline void early_init_fdt_scan_reserved_mem(void) {} static inline void early_init_fdt_reserve_self(void) {} -- cgit v1.2.3 From f2f8115fe8b390af27d013411045bd712a812103 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 21 Dec 2021 15:17:56 +0200 Subject: memory: omap-gpmc: Use a compatible match table when checking for NAND controller As more compatibles can be added to the GPMC NAND controller driver use a compatible match table. Signed-off-by: Roger Quadros Acked-by: Miquel Raynal Link: https://lore.kernel.org/r/20211221131757.2030-4-rogerq@kernel.org [krzysztof: remove "is_nand" variable] Signed-off-by: Krzysztof Kozlowski --- include/linux/platform_data/mtd-nand-omap2.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index de6ada739121..92f011805ad4 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -7,6 +7,7 @@ #define _MTD_NAND_OMAP2_H #include +#include #define GPMC_BCH_NUM_REMAINDER 8 @@ -61,4 +62,10 @@ struct gpmc_nand_regs { void __iomem *gpmc_bch_result5[GPMC_BCH_NUM_REMAINDER]; void __iomem *gpmc_bch_result6[GPMC_BCH_NUM_REMAINDER]; }; -#endif + +static const struct of_device_id omap_nand_ids[] = { + { .compatible = "ti,omap2-nand", }, + {}, +}; + +#endif /* _MTD_NAND_OMAP2_H */ -- cgit v1.2.3 From d7f55471db2719629f773c2d6b5742a69595bfd3 Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Fri, 17 Dec 2021 10:07:54 +0800 Subject: memblock: fix memblock_phys_alloc() section mismatch error Fix modpost Section mismatch error in memblock_phys_alloc() [...] WARNING: modpost: vmlinux.o(.text.unlikely+0x1dcc): Section mismatch in reference from the function memblock_phys_alloc() to the function .init.text:memblock_phys_alloc_range() The function memblock_phys_alloc() references the function __init memblock_phys_alloc_range(). This is often because memblock_phys_alloc lacks a __init annotation or the annotation of memblock_phys_alloc_range is wrong. ERROR: modpost: Section mismatches detected. Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them. [...] memblock_phys_alloc() is a one-line wrapper, make it __always_inline to avoid these section mismatches. Reported-by: k2ci Suggested-by: Mike Rapoport Signed-off-by: Jackie Liu [rppt: slightly massaged changelog ] Signed-off-by: Mike Rapoport Link: https://lore.kernel.org/r/20211217020754.2874872-1-liu.yun@linux.dev --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 8adcf1fa8096..9dc7cb239d21 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -405,8 +405,8 @@ phys_addr_t memblock_alloc_range_nid(phys_addr_t size, phys_addr_t end, int nid, bool exact_nid); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); -static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, - phys_addr_t align) +static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size, + phys_addr_t align) { return memblock_phys_alloc_range(size, align, 0, MEMBLOCK_ALLOC_ACCESSIBLE); -- cgit v1.2.3 From 744451c162a514044a912cbbd64b7a386035cc5b Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Fri, 17 Dec 2021 16:28:09 +0100 Subject: Bluetooth: hci_sync: Push sync command cancellation to workqueue syzbot reported that hci_cmd_sync_cancel may sleep from the wrong context. To avoid this, create a new work item that pushes the relevant parts into a different context. Note that we keep the old implementation with the name __hci_cmd_sync_cancel as the sleeping behaviour is desired in some cases. Reported-and-tested-by: syzbot+485cc00ea7cf41dfdbf1@syzkaller.appspotmail.com Fixes: c97a747efc93 ("Bluetooth: btusb: Cancel sync commands for certain URB errors") Signed-off-by: Benjamin Berg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + include/net/bluetooth/hci_sync.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4d69dcfebd63..6509109c2413 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -480,6 +480,7 @@ struct hci_dev { struct work_struct cmd_sync_work; struct list_head cmd_sync_work_list; struct mutex cmd_sync_work_lock; + struct work_struct cmd_sync_cancel_work; __u16 discov_timeout; struct delayed_work discov_off; diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index f4034bf8f1ce..435674cf388e 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -38,6 +38,7 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_cmd_sync_init(struct hci_dev *hdev); void hci_cmd_sync_clear(struct hci_dev *hdev); void hci_cmd_sync_cancel(struct hci_dev *hdev, int err); +void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err); int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); -- cgit v1.2.3 From 8e8b92ee60de5341e9db83c11f75a525e555e2b3 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 22 Dec 2021 12:21:56 -0800 Subject: Bluetooth: hci_sync: Add hci_le_create_conn_sync This adds hci_le_create_conn_sync and make hci_le_connect use it instead of queueing multiple commands which may conflict with the likes of hci_update_passive_scan which uses hci_cmd_sync_queue. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 3 +-- include/net/bluetooth/hci_sync.h | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6509109c2413..1ab94960764e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1121,8 +1121,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, enum conn_reasons conn_reason); struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, - u16 conn_timeout, u8 role, - bdaddr_t *direct_rpa); + u16 conn_timeout, u8 role); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, enum conn_reasons conn_reason); diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 435674cf388e..2492e3b46a8f 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -102,3 +102,7 @@ int hci_stop_discovery_sync(struct hci_dev *hdev); int hci_suspend_sync(struct hci_dev *hdev); int hci_resume_sync(struct hci_dev *hdev); + +struct hci_conn; + +int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn); -- cgit v1.2.3 From 85b56857e194635b772be8af1c7650535d5d112a Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 22 Dec 2021 12:21:57 -0800 Subject: Bluetooth: hci_sync: Add support for waiting specific LE subevents This adds support for waiting for specific LE subevents instead of command status which may only indicate that the commands is in progress and a different event is used to complete the operation. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 77906832353f..4b3d0b16c185 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -412,6 +412,7 @@ struct bt_skb_cb { #define hci_skb_pkt_type(skb) bt_cb((skb))->pkt_type #define hci_skb_expect(skb) bt_cb((skb))->expect #define hci_skb_opcode(skb) bt_cb((skb))->hci.opcode +#define hci_skb_event(skb) bt_cb((skb))->hci.req_event #define hci_skb_sk(skb) bt_cb((skb))->hci.sk static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how) -- cgit v1.2.3 From 4fc9857ab8c6cfe2152df3288c8cf3300b929f1a Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 22 Dec 2021 12:21:59 -0800 Subject: Bluetooth: hci_sync: Add check simultaneous roles support This attempts to check if the controller can act as both central and peripheral simultaneously and in case it does skip suspending advertising or in case of directed advertising don't fail if scanning. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 1ab94960764e..586f69d084a2 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -783,6 +783,12 @@ extern struct mutex hci_cb_list_lock; hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); \ } while (0) +#define hci_dev_le_state_simultaneous(hdev) \ + (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && \ + (hdev->le_states[4] & 0x08) && /* Central */ \ + (hdev->le_states[4] & 0x40) && /* Peripheral */ \ + (hdev->le_states[3] & 0x10)) /* Simultaneous */ + /* ----- HCI interface to upper protocols ----- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); int l2cap_disconn_ind(struct hci_conn *hcon); -- cgit v1.2.3 From 76d0685bbac8ae017e5d12eba25fb3c4f0ec77ac Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 22 Dec 2021 12:22:00 -0800 Subject: Bluetooth: MGMT: Fix LE simultaneous roles UUID if not supported If controller/driver don't support LE simultaneous roles its UUID shall be omitted when responding to MGMT_OP_READ_EXP_FEATURES_INFO. This also rework the support introducing HCI_LE_SIMULTANEOUS_ROLES flag so it can be detected when userspace wants to use or not. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 7f5f00ff53da..e2b06bb79e2e 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -341,6 +341,7 @@ enum { HCI_FORCE_NO_MITM, HCI_QUALITY_REPORT, HCI_OFFLOAD_CODECS_ENABLED, + HCI_LE_SIMULTANEOUS_ROLES, __HCI_NUM_FLAGS, }; -- cgit v1.2.3 From 15fcb1031178f2a42425c2993b2ec7bb894c04d6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 21 Dec 2021 11:39:40 -0800 Subject: codel: remove unnecessary sock.h include Since sock.h is modified relatively often (60 times in the last 12 months) it seems worthwhile to decrease the incremental build work. CoDel's header includes net/inet_ecn.h which in turn includes net/sock.h. codel.h is itself included by mac80211 which is included by much of the WiFi stack and drivers. Removing the net/inet_ecn.h include from CoDel breaks the dependecy between WiFi and sock.h. Commit d068ca2ae2e6 ("codel: split into multiple files") moved all the code which actually needs ECN helpers out to net/codel_impl.h, the include can be moved there as well. This decreases the incremental build size after touching sock.h from 4999 objects to 4051 objects. Fix unmasked missing includes in WiFi drivers. Acked-by: Kalle Valo Link: https://lore.kernel.org/r/20211221193941.3805147-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/codel.h | 1 - include/net/codel_impl.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/codel.h b/include/net/codel.h index a6c9e34e62b8..d74dd8fda54e 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -45,7 +45,6 @@ #include #include #include -#include /* Controlling Queue Delay (CoDel) algorithm * ========================================= diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h index 137d40d8cbeb..78a27ac73070 100644 --- a/include/net/codel_impl.h +++ b/include/net/codel_impl.h @@ -49,6 +49,8 @@ * Implemented on linux by Dave Taht and Eric Dumazet */ +#include + static void codel_params_init(struct codel_params *params) { params->interval = MS2TIME(100); -- cgit v1.2.3 From e6e5904455815626b711c7d48cacd253f4d72f84 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 21 Dec 2021 11:39:41 -0800 Subject: codel: remove unnecessary pkt_sched.h include Commit d068ca2ae2e6 ("codel: split into multiple files") moved all Qdisc-related code to codel_qdisc.h, move the include of pkt_sched.h as well. This is similar to the previous commit, although we don't care as much about incremental builds after pkt_sched.h was touched itself it is included by net/sch_generic.h which is modified ~20 times a year. This decreases the incremental build size after touching pkt_sched.h from 1592 to 617 objects. Fix unmasked missing includes in WiFi drivers. Acked-by: Kalle Valo Link: https://lore.kernel.org/r/20211221193941.3805147-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/codel.h | 1 - include/net/codel_qdisc.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/codel.h b/include/net/codel.h index d74dd8fda54e..5fed2f16cb8d 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -44,7 +44,6 @@ #include #include #include -#include /* Controlling Queue Delay (CoDel) algorithm * ========================================= diff --git a/include/net/codel_qdisc.h b/include/net/codel_qdisc.h index 098630f83a55..58b6d0ebea10 100644 --- a/include/net/codel_qdisc.h +++ b/include/net/codel_qdisc.h @@ -49,6 +49,8 @@ * Implemented on linux by Dave Taht and Eric Dumazet */ +#include + /* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */ struct codel_skb_cb { codel_time_t enqueue_time; -- cgit v1.2.3 From 6fd3c510ee4b37f2f9fe3d3cafbfa459e15c5e11 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 22 Dec 2021 13:15:32 -0800 Subject: bio.h: fix kernel-doc warnings Fix all kernel-doc warnings in : include/linux/bio.h:136: warning: Function parameter or member 'nbytes' not described in 'bio_advance' include/linux/bio.h:136: warning: Excess function parameter 'bytes' description in 'bio_advance' include/linux/bio.h:391: warning: No description found for return value of 'bio_next_split' Signed-off-by: Randy Dunlap Cc: Kent Overstreet Cc: Jens Axboe Cc: linux-block@vger.kernel.org Link: https://lore.kernel.org/r/20211222211532.24060-1-rdunlap@infradead.org Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index fe6bdfbbef66..0a41efe02208 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -124,7 +124,7 @@ void __bio_advance(struct bio *, unsigned bytes); /** * bio_advance - increment/complete a bio by some number of bytes * @bio: bio to advance - * @bytes: number of bytes to complete + * @nbytes: number of bytes to complete * * This updates bi_sector, bi_size and bi_idx; if the number of bytes to * complete doesn't align with a bvec boundary, then bv_len and bv_offset will @@ -332,7 +332,7 @@ extern struct bio *bio_split(struct bio *bio, int sectors, * @gfp: gfp mask * @bs: bio set to allocate from * - * Returns a bio representing the next @sectors of @bio - if the bio is smaller + * Return: a bio representing the next @sectors of @bio - if the bio is smaller * than @sectors, returns the original bio unchanged. */ static inline struct bio *bio_next_split(struct bio *bio, int sectors, -- cgit v1.2.3 From a16c7246368db8935652c805bc446928d0e1c0aa Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 22 Dec 2021 13:52:39 -0800 Subject: block: remove unnecessary trailing '\' While harmless, the blank line is certainly not intended to be part of the rq_list_for_each() macro. Remove it. Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20211222215239.1768164-1-kbusch@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bb5fb7282e6e..22746b2d6825 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1363,7 +1363,7 @@ struct io_comp_batch { }) #define rq_list_for_each(listptr, pos) \ - for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos)) \ + for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos)) #define rq_list_next(rq) (rq)->rq_next #define rq_list_empty(list) ((list) == (struct request *) NULL) -- cgit v1.2.3 From 023223dfbfb34fcc9b7dd41e21fbf9a5d5237989 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 17 Dec 2021 20:37:34 +0100 Subject: netfilter: nf_tables: make counter support built-in Make counter support built-in to allow for direct call in case of CONFIG_RETPOLINE. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 0fa5a6d98a00..b6fb1fdff9b2 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -7,6 +7,7 @@ extern struct nft_expr_type nft_imm_type; extern struct nft_expr_type nft_cmp_type; +extern struct nft_expr_type nft_counter_type; extern struct nft_expr_type nft_lookup_type; extern struct nft_expr_type nft_bitwise_type; extern struct nft_expr_type nft_byteorder_type; @@ -21,6 +22,7 @@ extern struct nft_expr_type nft_last_type; #ifdef CONFIG_NETWORK_SECMARK extern struct nft_object_type nft_secmark_obj_type; #endif +extern struct nft_object_type nft_counter_obj_type; int nf_tables_core_module_init(void); void nf_tables_core_module_exit(void); @@ -120,6 +122,8 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext); +void nft_counter_init_seqcount(void); + struct nft_expr; struct nft_regs; struct nft_pktinfo; @@ -143,4 +147,6 @@ void nft_dynset_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); void nft_rt_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); #endif /* _NET_NF_TABLES_CORE_H */ -- cgit v1.2.3 From 4a6fbdd801e882ee6ca5cdfdc3374f0ae263174c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 17 Dec 2021 11:29:56 +0100 Subject: netfilter: conntrack: tag conntracks picked up in local out hook This allows to identify flows that originate from local machine in a followup patch. It would be possible to make this a ->status bit instead. For now I did not do that yet because I don't have a use-case for exposing this info to userspace. If one comes up the toggle can be replaced with a status bit. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index d24b0a34c8f0..871489df63c6 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -95,6 +95,7 @@ struct nf_conn { unsigned long status; u16 cpu; + u16 local_origin:1; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) -- cgit v1.2.3 From fbefe22811c3140a686e407e114789ebf328a9a2 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 20 Dec 2021 19:21:24 +0800 Subject: scsi: libsas: Don't always drain event workqueue for HA resume For the hisi_sas driver, if a directly attached disk is removed during suspend, a hang will occur in the resume process: The background is that in commit 16fd4a7c5917 ("scsi: hisi_sas: Add device link between SCSI devices and hisi_hba"), it is ensured that the HBA device cannot be runtime suspended when any SCSI device associated is active. Other drivers which use libsas don't worry about this as none support runtime suspend. The mentioned hang occurs when an disk is removed during suspend. In the removal process - from PHYE_RESUME_TIMEOUT event processing - we call into scsi_remove_device(), which is being processed in the HA event workqueue. Here we wait for all suppliers of the SCSI device to resume, which includes the HBA device (from the above commit). However the HBA device cannot resume, as it is waiting for the PHYE_RESUME_TIMEOUT to be processed (from calling sas_resume_ha() -> sas_drain_work()). This is the deadlock. There does not appear to be any need for the sas_drain_work() to be called at all in sas_resume_ha() as it is not syncing against anything, so allow LLDDs to avoid this by providing a variant of sas_resume_ha() which does "sync", i.e. doesn't drain the event workqueue. Link: https://lore.kernel.org/r/1639999298-244569-2-git-send-email-chenxiang66@hisilicon.com Signed-off-by: John Garry Signed-off-by: Xiang Chen Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 79e4903bd414..a795a2d9e5b1 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -660,6 +660,7 @@ extern int sas_register_ha(struct sas_ha_struct *); extern int sas_unregister_ha(struct sas_ha_struct *); extern void sas_prep_resume_ha(struct sas_ha_struct *sas_ha); extern void sas_resume_ha(struct sas_ha_struct *sas_ha); +extern void sas_resume_ha_no_sync(struct sas_ha_struct *sas_ha); extern void sas_suspend_ha(struct sas_ha_struct *sas_ha); int sas_set_phy_speed(struct sas_phy *phy, struct sas_phy_linkrates *rates); -- cgit v1.2.3 From 6e1fcab00a23f7fe9f4fe9704905a790efa1eeab Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 20 Dec 2021 19:21:26 +0800 Subject: scsi: block: pm: Always set request queue runtime active in blk_post_runtime_resume() John Garry reported a deadlock that occurs when trying to access a runtime-suspended SATA device. For obscure reasons, the rescan procedure causes the link to be hard-reset, which disconnects the device. The rescan tries to carry out a runtime resume when accessing the device. scsi_rescan_device() holds the SCSI device lock and won't release it until it can put commands onto the device's block queue. This can't happen until the queue is successfully runtime-resumed or the device is unregistered. But the runtime resume fails because the device is disconnected, and __scsi_remove_device() can't do the unregistration because it can't get the device lock. The best way to resolve this deadlock appears to be to allow the block queue to start running again even after an unsuccessful runtime resume. The idea is that the driver or the SCSI error handler will need to be able to use the queue to resolve the runtime resume failure. This patch removes the err argument to blk_post_runtime_resume() and makes the routine act as though the resume was successful always. This fixes the deadlock. Link: https://lore.kernel.org/r/1639999298-244569-4-git-send-email-chenxiang66@hisilicon.com Fixes: e27829dc92e5 ("scsi: serialize ->rescan against ->remove") Reported-and-tested-by: John Garry Reviewed-by: Bart Van Assche Signed-off-by: Alan Stern Signed-off-by: Xiang Chen Signed-off-by: Martin K. Petersen --- include/linux/blk-pm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h index b80c65aba249..2580e05a8ab6 100644 --- a/include/linux/blk-pm.h +++ b/include/linux/blk-pm.h @@ -14,7 +14,7 @@ extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); extern int blk_pre_runtime_suspend(struct request_queue *q); extern void blk_post_runtime_suspend(struct request_queue *q, int err); extern void blk_pre_runtime_resume(struct request_queue *q); -extern void blk_post_runtime_resume(struct request_queue *q, int err); +extern void blk_post_runtime_resume(struct request_queue *q); extern void blk_set_runtime_active(struct request_queue *q); #else static inline void blk_pm_runtime_init(struct request_queue *q, -- cgit v1.2.3 From 4ea775abbb5c50c26edbf043d5a2ae7fde407f4a Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Mon, 20 Dec 2021 19:21:33 +0800 Subject: scsi: libsas: Add flag SAS_HA_RESUMING Add a flag SAS_HA_RESUMING and use it to indicate the state of resuming the host controller. Link: https://lore.kernel.org/r/1639999298-244569-11-git-send-email-chenxiang66@hisilicon.com Reviewed-by: John Garry Signed-off-by: Xiang Chen Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index a795a2d9e5b1..698f2032807b 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -356,6 +356,7 @@ enum sas_ha_state { SAS_HA_DRAINING, SAS_HA_ATA_EH_ACTIVE, SAS_HA_FROZEN, + SAS_HA_RESUMING, }; struct sas_ha_struct { -- cgit v1.2.3 From e6911affa416dc4e0c0b3f04cbe6b02ce13277f1 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Wed, 22 Dec 2021 17:06:58 +0800 Subject: xfrm: Add support for SM3 secure hash This patch allows IPsec to use SM3 HMAC authentication algorithm. Signed-off-by: Xu Jia Signed-off-by: Steffen Klassert --- include/uapi/linux/pfkeyv2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/pfkeyv2.h b/include/uapi/linux/pfkeyv2.h index d65b11785260..798ba9ffd48c 100644 --- a/include/uapi/linux/pfkeyv2.h +++ b/include/uapi/linux/pfkeyv2.h @@ -309,6 +309,7 @@ struct sadb_x_filter { #define SADB_X_AALG_SHA2_512HMAC 7 #define SADB_X_AALG_RIPEMD160HMAC 8 #define SADB_X_AALG_AES_XCBC_MAC 9 +#define SADB_X_AALG_SM3_256HMAC 10 #define SADB_X_AALG_NULL 251 /* kame */ #define SADB_AALG_MAX 251 -- cgit v1.2.3 From 23b6a6df94c6ce434e7947cfad14b1640fb9f794 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Wed, 22 Dec 2021 17:06:59 +0800 Subject: xfrm: Add support for SM4 symmetric cipher algorithm This patch adds SM4 encryption algorithm entry to ealg_list. Signed-off-by: Xu Jia Signed-off-by: Steffen Klassert --- include/uapi/linux/pfkeyv2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/pfkeyv2.h b/include/uapi/linux/pfkeyv2.h index 798ba9ffd48c..8abae1f6749c 100644 --- a/include/uapi/linux/pfkeyv2.h +++ b/include/uapi/linux/pfkeyv2.h @@ -330,6 +330,7 @@ struct sadb_x_filter { #define SADB_X_EALG_AES_GCM_ICV16 20 #define SADB_X_EALG_CAMELLIACBC 22 #define SADB_X_EALG_NULL_AES_GMAC 23 +#define SADB_X_EALG_SM4CBC 24 #define SADB_EALG_MAX 253 /* last EALG */ /* private allocations should use 249-255 (RFC2407) */ #define SADB_X_EALG_SERPENTCBC 252 /* draft-ietf-ipsec-ciph-aes-cbc-00 */ -- cgit v1.2.3 From 4e484b3e969b52effd95c17f7a86f39208b2ccf4 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Wed, 22 Dec 2021 14:11:18 +0100 Subject: xfrm: rate limit SA mapping change message to user space Kernel generates mapping change message, XFRM_MSG_MAPPING, when a source port chage is detected on a input state with UDP encapsulation set. Kernel generates a message for each IPsec packet with new source port. For a high speed flow per packet mapping change message can be excessive, and can overload the user space listener. Introduce rate limiting for XFRM_MSG_MAPPING message to the user space. The rate limiting is configurable via netlink, when adding a new SA or updating it. Use the new attribute XFRMA_MTIMER_THRESH in seconds. v1->v2 change: update xfrm_sa_len() v2->v3 changes: use u32 insted unsigned long to reduce size of struct xfrm_state fix xfrm_ompat size Reported-by: kernel test robot accept XFRM_MSG_MAPPING only when XFRMA_ENCAP is present Co-developed-by: Thomas Egerer Signed-off-by: Thomas Egerer Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 5 +++++ include/uapi/linux/xfrm.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2308210793a0..2589e4c0501b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -200,6 +200,11 @@ struct xfrm_state { struct xfrm_algo_aead *aead; const char *geniv; + /* mapping change rate limiting */ + __be16 new_mapping_sport; + u32 new_mapping; /* seconds */ + u32 mapping_maxage; /* seconds for input SA */ + /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; struct sock __rcu *encap_sk; diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index eda0426ec4c2..4e29d7851890 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -313,6 +313,7 @@ enum xfrm_attr_type_t { XFRMA_SET_MARK, /* __u32 */ XFRMA_SET_MARK_MASK, /* __u32 */ XFRMA_IF_ID, /* __u32 */ + XFRMA_MTIMER_THRESH, /* __u32 in seconds for input SA */ __XFRMA_MAX #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ -- cgit v1.2.3 From 3d3b2f57d4447e6e9f4096ad01d0e4129f7bc7e9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 21 Dec 2021 16:40:30 -0500 Subject: sctp: move hlist_node and hashent out of sctp_ep_common Struct sctp_ep_common is included in both asoc and ep, but hlist_node and hashent are only needed by ep after asoc_hashtable was dropped by Commit b5eff7128366 ("sctp: drop the old assoc hashtable of sctp"). So it is better to move hlist_node and hashent from sctp_ep_common to sctp_endpoint, and it saves some space for each asoc. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 4 ++-- include/net/sctp/structs.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 189fdb9db162..33cf4789009f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -510,8 +510,8 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport) return (net_hash_mix(net) + lport) & (sctp_ep_hashsize - 1); } -#define sctp_for_each_hentry(epb, head) \ - hlist_for_each_entry(epb, head, node) +#define sctp_for_each_hentry(ep, head) \ + hlist_for_each_entry(ep, head, node) /* Is a socket of this style? */ #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style)) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e7d1ed7a3dfb..8826b47e4ff7 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1243,10 +1243,6 @@ enum sctp_endpoint_type { */ struct sctp_ep_common { - /* Fields to help us manage our entries in the hash tables. */ - struct hlist_node node; - int hashent; - /* Runtime type information. What kind of endpoint is this? */ enum sctp_endpoint_type type; @@ -1298,6 +1294,10 @@ struct sctp_endpoint { /* Common substructure for endpoint and association. */ struct sctp_ep_common base; + /* Fields to help us manage our entries in the hash tables. */ + struct hlist_node node; + int hashent; + /* Associations: A list of current associations and mappings * to the data consumers for each association. This * may be in the form of a hash table or other -- cgit v1.2.3 From 30be4551f9e26292599e666985119a5b559a2e4a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Dec 2021 18:32:56 +0200 Subject: wwan: Replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Signed-off-by: Andy Shevchenko Reviewed-by: Sergey Ryazanov Signed-off-by: David S. Miller --- include/linux/wwan.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/wwan.h b/include/linux/wwan.h index e143c88bf4b0..afb3334ec8c5 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -4,12 +4,9 @@ #ifndef __WWAN_H #define __WWAN_H -#include -#include #include -#include -#include #include +#include /** * enum wwan_port_type - WWAN port types @@ -37,6 +34,10 @@ enum wwan_port_type { WWAN_PORT_UNKNOWN, }; +struct device; +struct file; +struct netlink_ext_ack; +struct sk_buff; struct wwan_port; /** struct wwan_port_ops - The WWAN port operations -- cgit v1.2.3 From 7da1d1ddd1f02e5de7497a0c849256912652fb6c Mon Sep 17 00:00:00 2001 From: Nick Child Date: Thu, 16 Dec 2021 17:00:35 -0500 Subject: cuda/pmu: Make find_via_cuda/pmu init functions Make `find_via_cuda` and `find_via_pmu` initialization functions. Previously, their definitions in `drivers/macintosh/via-cuda.h` include the `__init` attribute but their alternative definitions in `arch/powerpc/powermac/sectup./c` and prototypes in `include/linux/ cuda.h` and `include/linux/pmu.h` do not use the `__init` macro. Since, only initialization functions call `find_via_cuda` and `find_via_pmu` it is safe to label these functions with `__init`. Signed-off-by: Nick Child Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211216220035.605465-21-nick.child@ibm.com --- include/linux/cuda.h | 2 +- include/linux/pmu.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cuda.h b/include/linux/cuda.h index 45bfe9d61271..daf3e6f98444 100644 --- a/include/linux/cuda.h +++ b/include/linux/cuda.h @@ -12,7 +12,7 @@ #include -extern int find_via_cuda(void); +extern int __init find_via_cuda(void); extern int cuda_request(struct adb_request *req, void (*done)(struct adb_request *), int nbytes, ...); extern void cuda_poll(void); diff --git a/include/linux/pmu.h b/include/linux/pmu.h index 52453a24a24f..c677442d007c 100644 --- a/include/linux/pmu.h +++ b/include/linux/pmu.h @@ -13,7 +13,7 @@ #include -extern int find_via_pmu(void); +extern int __init find_via_pmu(void); extern int pmu_request(struct adb_request *req, void (*done)(struct adb_request *), int nbytes, ...); -- cgit v1.2.3 From fc179420fde3821c4d191e81b4f7b05c1dab87e2 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 23 Dec 2021 13:36:17 +0200 Subject: ASoC: SOF: Move the definition of enum snd_sof_fw_state to global header Move the enum snd_sof_fw_state to include/sound/sof.h to be accessible outside of the core SOF stack. Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Kai Vehmanen Reviewed-by: Paul Olaru Link: https://lore.kernel.org/r/20211223113628.18582-10-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/sound/sof.h b/include/sound/sof.h index 23b374311d16..b9131c01eefd 100644 --- a/include/sound/sof.h +++ b/include/sound/sof.h @@ -17,6 +17,28 @@ struct snd_sof_dsp_ops; +/** + * enum snd_sof_fw_state - DSP firmware state definitions + * @SOF_FW_BOOT_NOT_STARTED: firmware boot is not yet started + * @SOF_FW_BOOT_PREPARE: preparing for boot (firmware loading for exaqmple) + * @SOF_FW_BOOT_IN_PROGRESS: firmware boot is in progress + * @SOF_FW_BOOT_FAILED: firmware boot failed + * @SOF_FW_BOOT_READY_FAILED: firmware booted but fw_ready op failed + * @SOF_FW_BOOT_READY_OK: firmware booted and fw_ready op passed + * @SOF_FW_BOOT_COMPLETE: firmware is booted up and functional + * @SOF_FW_CRASHED: firmware crashed after successful boot + */ +enum snd_sof_fw_state { + SOF_FW_BOOT_NOT_STARTED = 0, + SOF_FW_BOOT_PREPARE, + SOF_FW_BOOT_IN_PROGRESS, + SOF_FW_BOOT_FAILED, + SOF_FW_BOOT_READY_FAILED, + SOF_FW_BOOT_READY_OK, + SOF_FW_BOOT_COMPLETE, + SOF_FW_CRASHED, +}; + /* * SOF Platform data. */ -- cgit v1.2.3 From d41607d37c1385da799f9a2ddb10c460e573687e Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 23 Dec 2021 13:36:18 +0200 Subject: ASoC: SOF: Rename 'enum snd_sof_fw_state' to 'enum sof_fw_state' Since there is nothing SND about the firmware state, rename the enum from `snd_sof_fw_state` to simply `sof_fw_state` Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Kai Vehmanen Reviewed-by: Paul Olaru Link: https://lore.kernel.org/r/20211223113628.18582-11-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/sof.h b/include/sound/sof.h index b9131c01eefd..813680ab9aad 100644 --- a/include/sound/sof.h +++ b/include/sound/sof.h @@ -18,7 +18,7 @@ struct snd_sof_dsp_ops; /** - * enum snd_sof_fw_state - DSP firmware state definitions + * enum sof_fw_state - DSP firmware state definitions * @SOF_FW_BOOT_NOT_STARTED: firmware boot is not yet started * @SOF_FW_BOOT_PREPARE: preparing for boot (firmware loading for exaqmple) * @SOF_FW_BOOT_IN_PROGRESS: firmware boot is in progress @@ -28,7 +28,7 @@ struct snd_sof_dsp_ops; * @SOF_FW_BOOT_COMPLETE: firmware is booted up and functional * @SOF_FW_CRASHED: firmware crashed after successful boot */ -enum snd_sof_fw_state { +enum sof_fw_state { SOF_FW_BOOT_NOT_STARTED = 0, SOF_FW_BOOT_PREPARE, SOF_FW_BOOT_IN_PROGRESS, -- cgit v1.2.3 From 66b354064a35b6379963cba27b5d37a278fc9bd9 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 23 Nov 2021 11:16:00 +0100 Subject: powercap/drivers/dtpm: Remove unused function definition The dtpm.h header file is exporting a function which is not implemented neither needed. Remove it. Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20211123101601.2433340-1-daniel.lezcano@linaro.org --- include/linux/dtpm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h index 2890f6370eb9..d37e5d06a357 100644 --- a/include/linux/dtpm.h +++ b/include/linux/dtpm.h @@ -70,6 +70,4 @@ void dtpm_unregister(struct dtpm *dtpm); int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent); -int dtpm_register_cpu(struct dtpm *parent); - #endif -- cgit v1.2.3 From 9f01881beae9e40c87f3edfb81e87e973f85d272 Mon Sep 17 00:00:00 2001 From: Akhil R Date: Thu, 16 Dec 2021 15:10:08 +0100 Subject: dt-bindings: gpio: Add Tegra241 support Add the port definitions for the main and AON GPIO controllers found on Tegra241 (Grace). Signed-off-by: Akhil R Signed-off-by: Thierry Reding Signed-off-by: Bartosz Golaszewski --- include/dt-bindings/gpio/tegra241-gpio.h | 42 ++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 include/dt-bindings/gpio/tegra241-gpio.h (limited to 'include') diff --git a/include/dt-bindings/gpio/tegra241-gpio.h b/include/dt-bindings/gpio/tegra241-gpio.h new file mode 100644 index 000000000000..80cee3016be6 --- /dev/null +++ b/include/dt-bindings/gpio/tegra241-gpio.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. */ + +/* + * This header provides constants for the nvidia,tegra241-gpio DT binding. + * + * The first cell in Tegra's GPIO specifier is the GPIO ID. The macros below + * provide names for this. + * + * The second cell contains standard flag values specified in gpio.h. + */ + +#ifndef _DT_BINDINGS_GPIO_TEGRA241_GPIO_H +#define _DT_BINDINGS_GPIO_TEGRA241_GPIO_H + +#include + +/* GPIOs implemented by main GPIO controller */ +#define TEGRA241_MAIN_GPIO_PORT_A 0 +#define TEGRA241_MAIN_GPIO_PORT_B 1 +#define TEGRA241_MAIN_GPIO_PORT_C 2 +#define TEGRA241_MAIN_GPIO_PORT_D 3 +#define TEGRA241_MAIN_GPIO_PORT_E 4 +#define TEGRA241_MAIN_GPIO_PORT_F 5 +#define TEGRA241_MAIN_GPIO_PORT_G 6 +#define TEGRA241_MAIN_GPIO_PORT_H 7 +#define TEGRA241_MAIN_GPIO_PORT_I 8 +#define TEGRA241_MAIN_GPIO_PORT_J 9 +#define TEGRA241_MAIN_GPIO_PORT_K 10 +#define TEGRA241_MAIN_GPIO_PORT_L 11 + +#define TEGRA241_MAIN_GPIO(port, offset) \ + ((TEGRA241_MAIN_GPIO_PORT_##port * 8) + (offset)) + +/* GPIOs implemented by AON GPIO controller */ +#define TEGRA241_AON_GPIO_PORT_AA 0 +#define TEGRA241_AON_GPIO_PORT_BB 1 + +#define TEGRA241_AON_GPIO(port, offset) \ + ((TEGRA241_AON_GPIO_PORT_##port * 8) + (offset)) + +#endif -- cgit v1.2.3 From dd123e62bdedcd3a486e48e883ec63138ec2c14c Mon Sep 17 00:00:00 2001 From: Henning Schild Date: Mon, 13 Dec 2021 13:04:59 +0100 Subject: platform/x86: simatic-ipc: add main driver for Siemens devices This mainly implements detection of these devices and will allow secondary drivers to work on such machines. The identification is DMI-based with a vendor specific way to tell them apart in a reliable way. Drivers for LEDs and Watchdogs will follow to make use of that platform detection. There is also some code to allow secondary drivers to find GPIO memory, that needs to be in place because the pinctrl drivers do not come up. Signed-off-by: Henning Schild Link: https://lore.kernel.org/r/20211213120502.20661-2-henning.schild@siemens.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/platform_data/x86/simatic-ipc-base.h | 29 +++++++++ include/linux/platform_data/x86/simatic-ipc.h | 72 ++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 include/linux/platform_data/x86/simatic-ipc-base.h create mode 100644 include/linux/platform_data/x86/simatic-ipc.h (limited to 'include') diff --git a/include/linux/platform_data/x86/simatic-ipc-base.h b/include/linux/platform_data/x86/simatic-ipc-base.h new file mode 100644 index 000000000000..62d2bc774067 --- /dev/null +++ b/include/linux/platform_data/x86/simatic-ipc-base.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Siemens SIMATIC IPC drivers + * + * Copyright (c) Siemens AG, 2018-2021 + * + * Authors: + * Henning Schild + * Gerd Haeussler + */ + +#ifndef __PLATFORM_DATA_X86_SIMATIC_IPC_BASE_H +#define __PLATFORM_DATA_X86_SIMATIC_IPC_BASE_H + +#include + +#define SIMATIC_IPC_DEVICE_NONE 0 +#define SIMATIC_IPC_DEVICE_227D 1 +#define SIMATIC_IPC_DEVICE_427E 2 +#define SIMATIC_IPC_DEVICE_127E 3 +#define SIMATIC_IPC_DEVICE_227E 4 + +struct simatic_ipc_platform { + u8 devmode; +}; + +u32 simatic_ipc_get_membase0(unsigned int p2sb); + +#endif /* __PLATFORM_DATA_X86_SIMATIC_IPC_BASE_H */ diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h new file mode 100644 index 000000000000..f3b76b39776b --- /dev/null +++ b/include/linux/platform_data/x86/simatic-ipc.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Siemens SIMATIC IPC drivers + * + * Copyright (c) Siemens AG, 2018-2021 + * + * Authors: + * Henning Schild + * Gerd Haeussler + */ + +#ifndef __PLATFORM_DATA_X86_SIMATIC_IPC_H +#define __PLATFORM_DATA_X86_SIMATIC_IPC_H + +#include +#include + +#define SIMATIC_IPC_DMI_ENTRY_OEM 129 +/* binary type */ +#define SIMATIC_IPC_DMI_TYPE 0xff +#define SIMATIC_IPC_DMI_GROUP 0x05 +#define SIMATIC_IPC_DMI_ENTRY 0x02 +#define SIMATIC_IPC_DMI_TID 0x02 + +enum simatic_ipc_station_ids { + SIMATIC_IPC_INVALID_STATION_ID = 0, + SIMATIC_IPC_IPC227D = 0x00000501, + SIMATIC_IPC_IPC427D = 0x00000701, + SIMATIC_IPC_IPC227E = 0x00000901, + SIMATIC_IPC_IPC277E = 0x00000902, + SIMATIC_IPC_IPC427E = 0x00000A01, + SIMATIC_IPC_IPC477E = 0x00000A02, + SIMATIC_IPC_IPC127E = 0x00000D01, +}; + +static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len) +{ + struct { + u8 type; /* type (0xff = binary) */ + u8 len; /* len of data entry */ + u8 group; + u8 entry; + u8 tid; + __le32 station_id; /* station id (LE) */ + } __packed * data_entry = (void *)data + sizeof(struct dmi_header); + + while ((u8 *)data_entry < data + max_len) { + if (data_entry->type == SIMATIC_IPC_DMI_TYPE && + data_entry->len == sizeof(*data_entry) && + data_entry->group == SIMATIC_IPC_DMI_GROUP && + data_entry->entry == SIMATIC_IPC_DMI_ENTRY && + data_entry->tid == SIMATIC_IPC_DMI_TID) { + return le32_to_cpu(data_entry->station_id); + } + data_entry = (void *)((u8 *)(data_entry) + data_entry->len); + } + + return SIMATIC_IPC_INVALID_STATION_ID; +} + +static inline void +simatic_ipc_find_dmi_entry_helper(const struct dmi_header *dh, void *_data) +{ + u32 *id = _data; + + if (dh->type != SIMATIC_IPC_DMI_ENTRY_OEM) + return; + + *id = simatic_ipc_get_station_id((u8 *)dh, dh->length); +} + +#endif /* __PLATFORM_DATA_X86_SIMATIC_IPC_H */ -- cgit v1.2.3 From 3d4641a42ccf1593b3f3a474ee7541727acbb8e0 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 14 Dec 2021 15:20:46 +0100 Subject: ASoC: core: Add snd_soc_of_parse_pin_switches() from simple-card-utils The ASoC core already has several helpers to parse card properties from the device tree. Move the parsing code for "pin-switches" from simple-card-utils to a shared snd_soc_of_parse_pin_switches() function so other drivers can also use it to set up pin switches configured in the device tree. Cc: Paul Cercueil Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20211214142049.20422-2-stephan@gerhold.net Signed-off-by: Mark Brown --- include/sound/soc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 5872a8864f3b..7a1650b303f1 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1211,6 +1211,7 @@ int snd_soc_of_parse_card_name(struct snd_soc_card *card, const char *propname); int snd_soc_of_parse_audio_simple_widgets(struct snd_soc_card *card, const char *propname); +int snd_soc_of_parse_pin_switches(struct snd_soc_card *card, const char *prop); int snd_soc_of_get_slot_mask(struct device_node *np, const char *prop_name, unsigned int *mask); -- cgit v1.2.3 From b86947b52f0d0e5b6e6f0510933ca13aad266e47 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 24 Dec 2021 10:10:29 +0800 Subject: ASoC/soundwire: intel: simplify callbacks for params/hw_free We don't really need to pass a substream to the callback, we only need the direction. No functionality change, only simplification to enable improve suspend with paused streams. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Acked-By: Vinod Koul Link: https://lore.kernel.org/r/20211224021034.26635-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/linux/soundwire/sdw_intel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 8a463b8fc12a..67e0d3e750b5 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -92,7 +92,7 @@ * firmware. */ struct sdw_intel_stream_params_data { - struct snd_pcm_substream *substream; + int stream; struct snd_soc_dai *dai; struct snd_pcm_hw_params *hw_params; int link_id; @@ -105,7 +105,7 @@ struct sdw_intel_stream_params_data { * firmware. */ struct sdw_intel_stream_free_data { - struct snd_pcm_substream *substream; + int stream; struct snd_soc_dai *dai; int link_id; }; -- cgit v1.2.3 From e8444560b4d9302a511f0996f4cfdf85b628f4ca Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 24 Dec 2021 10:10:31 +0800 Subject: ASoC/SoundWire: dai: expand 'stream' concept beyond SoundWire The HDAudio ASoC support relies on the set_tdm_slots() helper to store the HDaudio stream tag in the tx_mask. This only works because of the pre-existing order in soc-pcm.c, where the hw_params() is handled for codec_dais *before* cpu_dais. When the order is reversed, the stream_tag is used as a mask in the codec fixup functions: /* fixup params based on TDM slot masks */ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && codec_dai->tx_mask) soc_pcm_codec_params_fixup(&codec_params, codec_dai->tx_mask); As a result of this confusion, the codec_params_fixup() ends-up generating bad channel masks, depending on what stream_tag was allocated. We could add a flag to state that the tx_mask is really not a mask, but it would be quite ugly to persist in overloading concepts. Instead, this patch suggests a more generic get/set 'stream' API based on the existing model for SoundWire. We can expand the concept to store 'stream' opaque information that is specific to different DAI types. In the case of HDAudio DAIs, we only need to store a stream tag as an unsigned char pointer. The TDM rx_ and tx_masks should really only be used to store masks. Rename get_sdw_stream/set_sdw_stream callbacks and helpers as get_stream/set_stream. No functionality change beyond the rename. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Acked-By: Vinod Koul Link: https://lore.kernel.org/r/20211224021034.26635-5-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 5d4dd7c5450b..bbd821d2df9c 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -295,9 +295,9 @@ struct snd_soc_dai_ops { unsigned int *rx_num, unsigned int *rx_slot); int (*set_tristate)(struct snd_soc_dai *dai, int tristate); - int (*set_sdw_stream)(struct snd_soc_dai *dai, - void *stream, int direction); - void *(*get_sdw_stream)(struct snd_soc_dai *dai, int direction); + int (*set_stream)(struct snd_soc_dai *dai, + void *stream, int direction); + void *(*get_stream)(struct snd_soc_dai *dai, int direction); /* * DAI digital mute - optional. @@ -515,42 +515,42 @@ static inline void *snd_soc_dai_get_drvdata(struct snd_soc_dai *dai) } /** - * snd_soc_dai_set_sdw_stream() - Configures a DAI for SDW stream operation + * snd_soc_dai_set_stream() - Configures a DAI for stream operation * @dai: DAI - * @stream: STREAM + * @stream: STREAM (opaque structure depending on DAI type) * @direction: Stream direction(Playback/Capture) - * SoundWire subsystem doesn't have a notion of direction and we reuse + * Some subsystems, such as SoundWire, don't have a notion of direction and we reuse * the ASoC stream direction to configure sink/source ports. * Playback maps to source ports and Capture for sink ports. * * This should be invoked with NULL to clear the stream set previously. * Returns 0 on success, a negative error code otherwise. */ -static inline int snd_soc_dai_set_sdw_stream(struct snd_soc_dai *dai, - void *stream, int direction) +static inline int snd_soc_dai_set_stream(struct snd_soc_dai *dai, + void *stream, int direction) { - if (dai->driver->ops->set_sdw_stream) - return dai->driver->ops->set_sdw_stream(dai, stream, direction); + if (dai->driver->ops->set_stream) + return dai->driver->ops->set_stream(dai, stream, direction); else return -ENOTSUPP; } /** - * snd_soc_dai_get_sdw_stream() - Retrieves SDW stream from DAI + * snd_soc_dai_get_stream() - Retrieves stream from DAI * @dai: DAI * @direction: Stream direction(Playback/Capture) * * This routine only retrieves that was previously configured - * with snd_soc_dai_get_sdw_stream() + * with snd_soc_dai_get_stream() * * Returns pointer to stream or an ERR_PTR value, e.g. * ERR_PTR(-ENOTSUPP) if callback is not supported; */ -static inline void *snd_soc_dai_get_sdw_stream(struct snd_soc_dai *dai, - int direction) +static inline void *snd_soc_dai_get_stream(struct snd_soc_dai *dai, + int direction) { - if (dai->driver->ops->get_sdw_stream) - return dai->driver->ops->get_sdw_stream(dai, direction); + if (dai->driver->ops->get_stream) + return dai->driver->ops->get_stream(dai, direction); else return ERR_PTR(-ENOTSUPP); } -- cgit v1.2.3 From 54bf7fa3efd08eea03e4bac04e188ee3db6173a7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 13 Dec 2021 17:11:45 +0100 Subject: ima: Fix undefined arch_ima_get_secureboot() and co Currently arch_ima_get_secureboot() and arch_get_ima_policy() are defined only when CONFIG_IMA is set, and this makes any code calling those functions without CONFIG_IMA fail. Move the declaration and the dummy definition of those functions outside ifdef-CONFIG_IMA block for fixing the undefined symbols. Signed-off-by: Takashi Iwai [zohar@linux.ibm.com: removed in-tree/out-of-tree comment in patch description] Reviewed-by: Petr Vorel Signed-off-by: Mimi Zohar --- include/linux/ima.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/ima.h b/include/linux/ima.h index b6ab66a546ae..426b1744215e 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -50,21 +50,6 @@ static inline void ima_appraise_parse_cmdline(void) {} extern void ima_add_kexec_buffer(struct kimage *image); #endif -#ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT -extern bool arch_ima_get_secureboot(void); -extern const char * const *arch_get_ima_policy(void); -#else -static inline bool arch_ima_get_secureboot(void) -{ - return false; -} - -static inline const char * const *arch_get_ima_policy(void) -{ - return NULL; -} -#endif - #else static inline enum hash_algo ima_get_current_hash_algo(void) { @@ -155,6 +140,21 @@ static inline int ima_measure_critical_data(const char *event_label, #endif /* CONFIG_IMA */ +#ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT +extern bool arch_ima_get_secureboot(void); +extern const char * const *arch_get_ima_policy(void); +#else +static inline bool arch_ima_get_secureboot(void) +{ + return false; +} + +static inline const char * const *arch_get_ima_policy(void) +{ + return NULL; +} +#endif + #ifndef CONFIG_IMA_KEXEC struct kimage; -- cgit v1.2.3 From 12054f0ce8be7d2003ec068ab27c9eb608397b98 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 16 Dec 2021 17:11:27 -0600 Subject: ALSA/ASoC: hda: move/rename snd_hdac_ext_stop_streams to hdac_stream.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit snd_hdac_ext_stop_streams() has really nothing to do with the extension, it just loops over the bus streams. Move it to the hdac_stream layer and rename to remove the 'ext' prefix and add the precision that the chip will also be stopped. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Kai Vehmanen Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Reviewed-by: Cezary Rojewski Link: https://lore.kernel.org/r/20211216231128.344321-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- include/sound/hdaudio.h | 1 + include/sound/hdaudio_ext.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h index 22af68b01426..6a90ce405e60 100644 --- a/include/sound/hdaudio.h +++ b/include/sound/hdaudio.h @@ -558,6 +558,7 @@ int snd_hdac_stream_set_params(struct hdac_stream *azx_dev, void snd_hdac_stream_start(struct hdac_stream *azx_dev, bool fresh_start); void snd_hdac_stream_clear(struct hdac_stream *azx_dev); void snd_hdac_stream_stop(struct hdac_stream *azx_dev); +void snd_hdac_stop_streams_and_chip(struct hdac_bus *bus); void snd_hdac_stream_reset(struct hdac_stream *azx_dev); void snd_hdac_stream_sync_trigger(struct hdac_stream *azx_dev, bool set, unsigned int streams, unsigned int reg); diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index d4e31ea16aba..56ea5cde5e63 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -92,7 +92,6 @@ void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus, struct hdac_ext_stream *azx_dev, bool decouple); void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, struct hdac_ext_stream *azx_dev, bool decouple); -void snd_hdac_ext_stop_streams(struct hdac_bus *bus); int snd_hdac_ext_stream_set_spib(struct hdac_bus *bus, struct hdac_ext_stream *stream, u32 value); -- cgit v1.2.3 From 0f7e5ee62f4c24ca9db58351c86653cc3ee0bd0e Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 16 Dec 2021 17:11:28 -0600 Subject: ALSA: HDA: hdac_ext_stream: use consistent prefixes for variables The existing code maximizes confusion by using 'stream' and 'hstream' variables of different types. Examples: struct hdac_stream *stream; struct hdac_ext_stream *stream; struct hdac_stream *hstream; struct hdac_ext_stream *hstream; with some additional copy/paste remains: struct hdac_ext_stream *azx_dev; This patch suggests a consistent naming across all 'hdac_ext_stream' functions. The convention is: struct hdac_stream *hstream; struct hdac_ext_stream *hext_stream; No functionality change - just renaming of variables and more consistent indentation. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Kai Vehmanen Reviewed-by: Rander Wang Link: https://lore.kernel.org/r/20211216231128.344321-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- include/sound/hdaudio_ext.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index 56ea5cde5e63..77123c3e4095 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -78,35 +78,35 @@ struct hdac_ext_stream { container_of(s, struct hdac_ext_stream, hstream) void snd_hdac_ext_stream_init(struct hdac_bus *bus, - struct hdac_ext_stream *stream, int idx, - int direction, int tag); + struct hdac_ext_stream *hext_stream, int idx, + int direction, int tag); int snd_hdac_ext_stream_init_all(struct hdac_bus *bus, int start_idx, - int num_stream, int dir); + int num_stream, int dir); void snd_hdac_stream_free_all(struct hdac_bus *bus); void snd_hdac_link_free_all(struct hdac_bus *bus); struct hdac_ext_stream *snd_hdac_ext_stream_assign(struct hdac_bus *bus, struct snd_pcm_substream *substream, int type); -void snd_hdac_ext_stream_release(struct hdac_ext_stream *azx_dev, int type); +void snd_hdac_ext_stream_release(struct hdac_ext_stream *hext_stream, int type); void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus, - struct hdac_ext_stream *azx_dev, bool decouple); + struct hdac_ext_stream *hext_stream, bool decouple); void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, struct hdac_ext_stream *azx_dev, bool decouple); int snd_hdac_ext_stream_set_spib(struct hdac_bus *bus, - struct hdac_ext_stream *stream, u32 value); + struct hdac_ext_stream *hext_stream, u32 value); int snd_hdac_ext_stream_get_spbmaxfifo(struct hdac_bus *bus, - struct hdac_ext_stream *stream); + struct hdac_ext_stream *hext_stream); void snd_hdac_ext_stream_drsm_enable(struct hdac_bus *bus, bool enable, int index); int snd_hdac_ext_stream_set_dpibr(struct hdac_bus *bus, - struct hdac_ext_stream *stream, u32 value); -int snd_hdac_ext_stream_set_lpib(struct hdac_ext_stream *stream, u32 value); + struct hdac_ext_stream *hext_stream, u32 value); +int snd_hdac_ext_stream_set_lpib(struct hdac_ext_stream *hext_stream, u32 value); -void snd_hdac_ext_link_stream_start(struct hdac_ext_stream *hstream); -void snd_hdac_ext_link_stream_clear(struct hdac_ext_stream *hstream); -void snd_hdac_ext_link_stream_reset(struct hdac_ext_stream *hstream); -int snd_hdac_ext_link_stream_setup(struct hdac_ext_stream *stream, int fmt); +void snd_hdac_ext_link_stream_start(struct hdac_ext_stream *hext_stream); +void snd_hdac_ext_link_stream_clear(struct hdac_ext_stream *hext_stream); +void snd_hdac_ext_link_stream_reset(struct hdac_ext_stream *hext_stream); +int snd_hdac_ext_link_stream_setup(struct hdac_ext_stream *hext_stream, int fmt); struct hdac_ext_link { struct hdac_bus *bus; -- cgit v1.2.3 From 5ec7d18d1813a5bead0b495045606c93873aecbb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 23 Dec 2021 13:04:30 -0500 Subject: sctp: use call_rcu to free endpoint This patch is to delay the endpoint free by calling call_rcu() to fix another use-after-free issue in sctp_sock_dump(): BUG: KASAN: use-after-free in __lock_acquire+0x36d9/0x4c20 Call Trace: __lock_acquire+0x36d9/0x4c20 kernel/locking/lockdep.c:3218 lock_acquire+0x1ed/0x520 kernel/locking/lockdep.c:3844 __raw_spin_lock_bh include/linux/spinlock_api_smp.h:135 [inline] _raw_spin_lock_bh+0x31/0x40 kernel/locking/spinlock.c:168 spin_lock_bh include/linux/spinlock.h:334 [inline] __lock_sock+0x203/0x350 net/core/sock.c:2253 lock_sock_nested+0xfe/0x120 net/core/sock.c:2774 lock_sock include/net/sock.h:1492 [inline] sctp_sock_dump+0x122/0xb20 net/sctp/diag.c:324 sctp_for_each_transport+0x2b5/0x370 net/sctp/socket.c:5091 sctp_diag_dump+0x3ac/0x660 net/sctp/diag.c:527 __inet_diag_dump+0xa8/0x140 net/ipv4/inet_diag.c:1049 inet_diag_dump+0x9b/0x110 net/ipv4/inet_diag.c:1065 netlink_dump+0x606/0x1080 net/netlink/af_netlink.c:2244 __netlink_dump_start+0x59a/0x7c0 net/netlink/af_netlink.c:2352 netlink_dump_start include/linux/netlink.h:216 [inline] inet_diag_handler_cmd+0x2ce/0x3f0 net/ipv4/inet_diag.c:1170 __sock_diag_cmd net/core/sock_diag.c:232 [inline] sock_diag_rcv_msg+0x31d/0x410 net/core/sock_diag.c:263 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2477 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:274 This issue occurs when asoc is peeled off and the old sk is freed after getting it by asoc->base.sk and before calling lock_sock(sk). To prevent the sk free, as a holder of the sk, ep should be alive when calling lock_sock(). This patch uses call_rcu() and moves sock_put and ep free into sctp_endpoint_destroy_rcu(), so that it's safe to try to hold the ep under rcu_read_lock in sctp_transport_traverse_process(). If sctp_endpoint_hold() returns true, it means this ep is still alive and we have held it and can continue to dump it; If it returns false, it means this ep is dead and can be freed after rcu_read_unlock, and we should skip it. In sctp_sock_dump(), after locking the sk, if this ep is different from tsp->asoc->ep, it means during this dumping, this asoc was peeled off before calling lock_sock(), and the sk should be skipped; If this ep is the same with tsp->asoc->ep, it means no peeloff happens on this asoc, and due to lock_sock, no peeloff will happen either until release_sock. Note that delaying endpoint free won't delay the port release, as the port release happens in sctp_endpoint_destroy() before calling call_rcu(). Also, freeing endpoint by call_rcu() makes it safe to access the sk by asoc->base.sk in sctp_assocs_seq_show() and sctp_rcv(). Thanks Jones to bring this issue up. v1->v2: - improve the changelog. - add kfree(ep) into sctp_endpoint_destroy_rcu(), as Jakub noticed. Reported-by: syzbot+9276d76e83e3bcde6c99@syzkaller.appspotmail.com Reported-by: Lee Jones Fixes: d25adbeb0cdb ("sctp: fix an use-after-free issue in sctp_sock_dump") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 6 +++--- include/net/sctp/structs.h | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 189fdb9db162..d314a180ab93 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -105,6 +105,7 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, @@ -115,9 +116,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p); +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 899c29c326ba..8dabd8800006 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1355,6 +1355,7 @@ struct sctp_endpoint { reconf_enable:1; __u8 strreset_enable; + struct rcu_head rcu; }; /* Recover the outter endpoint structure. */ @@ -1370,7 +1371,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t); void sctp_endpoint_free(struct sctp_endpoint *); void sctp_endpoint_put(struct sctp_endpoint *); -void sctp_endpoint_hold(struct sctp_endpoint *); +int sctp_endpoint_hold(struct sctp_endpoint *ep); void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *); struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, -- cgit v1.2.3 From 94ab10dd42a70acc5208a41325617e3d9cf81a70 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 24 Dec 2021 21:12:48 -0800 Subject: mm: delete unsafe BUG from page_cache_add_speculative() It is not easily reproducible, but on 5.16-rc I have several times hit the VM_BUG_ON_PAGE(PageTail(page), page) in page_cache_add_speculative(): usually from filemap_get_read_batch() for an ext4 read, yesterday from next_uptodate_page() from filemap_map_pages() for a shmem fault. That BUG used to be placed where page_ref_add_unless() had succeeded, but now it is placed before folio_ref_add_unless() is attempted: that is not safe, since it is only the acquired reference which makes the page safe from racing THP collapse or split. We could keep the BUG, checking PageTail only when folio_ref_try_add_rcu() has succeeded; but I don't think it adds much value - just delete it. Link: https://lkml.kernel.org/r/8b98fc6f-3439-8614-c3f3-945c659a1aba@google.com Fixes: 020853b6f5ea ("mm: Add folio_try_get_rcu()") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Reviewed-by: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: William Kucharski Cc: Christoph Hellwig Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 605246452305..d150a9082b31 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -285,7 +285,6 @@ static inline struct inode *folio_inode(struct folio *folio) static inline bool page_cache_add_speculative(struct page *page, int count) { - VM_BUG_ON_PAGE(PageTail(page), page); return folio_ref_try_add_rcu((struct folio *)page, count); } -- cgit v1.2.3 From 595ec1973c276f6c0c1de8aca5eef8dfd81f9b49 Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Fri, 24 Dec 2021 21:12:51 -0800 Subject: mm/page_alloc: fix __alloc_size attribute for alloc_pages_exact_nid The second parameter of alloc_pages_exact_nid is the one indicating the size of memory pointed by the returned pointer. Link: https://lkml.kernel.org/r/YbjEgwhn4bGblp//@coeus Fixes: abd58f38dfb4 ("mm/page_alloc: add __alloc_size attributes for better bounds checking") Signed-off-by: Thibaut Sautereau Acked-by: Kees Cook Cc: Daniel Micay Cc: Levente Polyak Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index b976c4177299..8fcc38467af6 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -624,7 +624,7 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask); void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1); void free_pages_exact(void *virt, size_t size); -__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(1); +__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask), 0) -- cgit v1.2.3 From 48f31169830f589e4c7ac475ccc7414951ded3f0 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Thu, 14 Oct 2021 22:38:41 +0300 Subject: habanalabs: change wait for interrupt timeout to 64 bit In order to increase maximum wait-for-interrupt timeout, change it to 64 bit variable. This wait is used only by newer ASICs, so no problem in changing this interface at this time. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 00b309590499..c5760acebdd1 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -911,14 +911,18 @@ struct hl_wait_cs_in { */ __u32 flags; - /* Multi CS API info- valid entries in multi-CS array */ - __u8 seq_arr_len; - __u8 pad[3]; + union { + struct { + /* Multi CS API info- valid entries in multi-CS array */ + __u8 seq_arr_len; + __u8 pad[7]; + }; - /* Absolute timeout to wait for an interrupt in microseconds. - * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set - */ - __u32 interrupt_timeout_us; + /* Absolute timeout to wait for an interrupt in microseconds. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set + */ + __u64 interrupt_timeout_us; + }; }; #define HL_WAIT_CS_STATUS_COMPLETED 0 -- cgit v1.2.3 From 1679c7ee580fdaa2a5df398a526b2eddc857f2a1 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Mon, 25 Oct 2021 09:47:04 +0300 Subject: habanalabs: expand clock throttling information uAPI In addition to the clock throttling reason, user should be able to obtain also the start time and the duration of the throttling event. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index c5760acebdd1..257b9630773e 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -473,15 +473,27 @@ struct hl_info_pci_counters { __u64 replay_cnt; }; -#define HL_CLK_THROTTLE_POWER 0x1 -#define HL_CLK_THROTTLE_THERMAL 0x2 +enum hl_clk_throttling_type { + HL_CLK_THROTTLE_TYPE_POWER, + HL_CLK_THROTTLE_TYPE_THERMAL, + HL_CLK_THROTTLE_TYPE_MAX +}; + +/* clk_throttling_reason masks */ +#define HL_CLK_THROTTLE_POWER (1 << HL_CLK_THROTTLE_TYPE_POWER) +#define HL_CLK_THROTTLE_THERMAL (1 << HL_CLK_THROTTLE_TYPE_THERMAL) /** * struct hl_info_clk_throttle - clock throttling reason * @clk_throttling_reason: each bit represents a clk throttling reason + * @clk_throttling_timestamp_us: represents CPU timestamp in microseconds of the start-event + * @clk_throttling_duration_ns: the clock throttle time in nanosec */ struct hl_info_clk_throttle { __u32 clk_throttling_reason; + __u32 pad; + __u64 clk_throttling_timestamp_us[HL_CLK_THROTTLE_TYPE_MAX]; + __u64 clk_throttling_duration_ns[HL_CLK_THROTTLE_TYPE_MAX]; }; /** -- cgit v1.2.3 From 49c052dad691ba1a3dc3559b74e99f2ec2fa0319 Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Sun, 24 Oct 2021 19:02:32 +0300 Subject: habanalabs: add new opcodes for INFO IOCTL Add implementation for new opcodes in the INFO IOCTL: 1. Retrieve the replaced DRAM rows from f/w. 2. Retrieve the pending DRAM rows from f/w. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 257b9630773e..9b4d72897061 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -334,6 +334,8 @@ enum hl_server_type { * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency * HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls + * HL_INFO_DRAM_REPLACED_ROWS - Retrieve DRAM replaced rows info + * HL_INFO_DRAM_PENDING_ROWS - Retrieve DRAM pending rows num */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -353,6 +355,8 @@ enum hl_server_type { #define HL_INFO_PLL_FREQUENCY 16 #define HL_INFO_POWER 17 #define HL_INFO_OPEN_STATS 18 +#define HL_INFO_DRAM_REPLACED_ROWS 21 +#define HL_INFO_DRAM_PENDING_ROWS 22 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 -- cgit v1.2.3 From 3e55b5dbf929a40966b8eb7d4de94fad3bb404bd Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Wed, 3 Nov 2021 10:09:59 +0200 Subject: habanalabs: add support for fetching historic errors A new uAPI is added for debug purposes of the user-space to retrieve errors related data from previous session (before device reset was performed). Inforamtion is filled when a razwi or CS timeout happens and can contain one of the following: 1. Retrieve timestamp of last time the device was opened and razwi or CS timeout happened. 2. Retrieve information about last CS timeout. 3. Retrieve information about last razwi error. This information doesn't contain user data, so no danger of data leakage between users. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 58 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 9b4d72897061..eb8565fdae70 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -336,6 +336,14 @@ enum hl_server_type { * HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls * HL_INFO_DRAM_REPLACED_ROWS - Retrieve DRAM replaced rows info * HL_INFO_DRAM_PENDING_ROWS - Retrieve DRAM pending rows num + * HL_INFO_LAST_ERR_OPEN_DEV_TIME - Retrieve timestamp of the last time the device was opened + * and CS timeout or razwi error occurred. + * HL_INFO_CS_TIMEOUT_EVENT - Retrieve CS timeout timestamp and its related CS sequence number. + * HL_INFO_RAZWI_EVENT - Retrieve parameters of razwi: + * Timestamp of razwi. + * The address which accessing it caused the razwi. + * Razwi initiator. + * Razwi cause, was it a page fault or MMU access error. */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -357,8 +365,11 @@ enum hl_server_type { #define HL_INFO_OPEN_STATS 18 #define HL_INFO_DRAM_REPLACED_ROWS 21 #define HL_INFO_DRAM_PENDING_ROWS 22 +#define HL_INFO_LAST_ERR_OPEN_DEV_TIME 23 +#define HL_INFO_CS_TIMEOUT_EVENT 24 +#define HL_INFO_RAZWI_EVENT 25 -#define HL_INFO_VERSION_MAX_LEN 128 +#define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 /** @@ -575,6 +586,51 @@ struct hl_info_cs_counters { __u64 ctx_validation_drop_cnt; }; +/** + * struct hl_info_last_err_open_dev_time - last error boot information. + * @timestamp: timestamp of last time the device was opened and error occurred. + */ +struct hl_info_last_err_open_dev_time { + __s64 timestamp; +}; + +/** + * struct hl_info_cs_timeout_event - last CS timeout information. + * @timestamp: timestamp when last CS timeout event occurred. + * @seq: sequence number of last CS timeout event. + */ +struct hl_info_cs_timeout_event { + __s64 timestamp; + __u64 seq; +}; + +#define HL_RAZWI_PAGE_FAULT 0 +#define HL_RAZWI_MMU_ACCESS_ERROR 1 + +/** + * struct hl_info_razwi_event - razwi information. + * @timestamp: timestamp of razwi. + * @addr: address which accessing it caused razwi. + * @engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does not + * have engine id it will be set to U16_MAX. + * @engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible + * engines which one them caused the razwi. In that case, it will contain the + * second possible engine id, otherwise it will be set to U16_MAX. + * @no_engine_id: if razwi initiator does not have engine id, this field will be set to 1, + * otherwise 0. + * @error_type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX. + * @pad: padding to 64 bit. + */ +struct hl_info_razwi_event { + __s64 timestamp; + __u64 addr; + __u16 engine_id_1; + __u16 engine_id_2; + __u8 no_engine_id; + __u8 error_type; + __u8 pad[2]; +}; + enum gaudi_dcores { HL_GAUDI_WS_DCORE, HL_GAUDI_WN_DCORE, -- cgit v1.2.3 From 75a5c44d143bc1818e8004a8bee6993aba3a75cf Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Thu, 18 Nov 2021 10:44:05 +0200 Subject: habanalabs: add power information type to POWER_GET packet In new f/w versions, it is required to explicitly indicate the power information type when querying the F/W for power info. When getting the current power level it should be set to power_input. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index eb8565fdae70..cd86937c572d 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -333,6 +333,7 @@ enum hl_server_type { * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency + * HL_INFO_POWER - Retrieve power information * HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls * HL_INFO_DRAM_REPLACED_ROWS - Retrieve DRAM replaced rows info * HL_INFO_DRAM_PENDING_ROWS - Retrieve DRAM pending rows num -- cgit v1.2.3 From 1880f7acd7e0edacbd46385036253801ddc4273f Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Tue, 9 Nov 2021 11:33:28 +0200 Subject: habanalabs: add SOB information to signal submission uAPI For debug purpose, add SOB address and SOB initial counter value before current submission to uAPI output. Using SOB address and initial counter, user can calculate how much of the submmision has been completed. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index cd86937c572d..648850b954a3 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -929,9 +929,17 @@ struct hl_cs_out { /* * SOB base address offset - * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set + * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY or HL_CS_FLAGS_SIGNAL is set */ __u32 sob_base_addr_offset; + + /* + * Count of completed signals in SOB before current signal submission. + * Valid only when (HL_CS_FLAGS_ENCAP_SIGNALS & HL_CS_FLAGS_STAGED_SUBMISSION) + * or HL_CS_FLAGS_SIGNAL is set + */ + __u16 sob_count_before_submission; + __u16 pad[3]; }; union hl_cs_args { -- cgit v1.2.3 From b9d31cada7d9f137028c11534fff77fec8511690 Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Tue, 2 Nov 2021 11:34:18 +0200 Subject: habanalabs: change wait_for_interrupt implementation Currently the cq counters are allocated in userspace memory, and mapped by the driver to the device address space. A new requirement that is part of new future API related to this one, requires that cq counters will be allocated in kernel memory. We leverage the existing cb_create API with KERNEL_MAPPED flag set to allocate this memory. That way we gain two things: 1. The memory cannot be freed while in use since it's protected by refcount in driver. 2. No need to wake up the user thread upon each interrupt from CQ, because the kernel has direct access to the counter. Therefore, it can make comparison with the target value in the interrupt handler and wake up the user thread only if the counter reaches the target value. This is instead of waking the thread up to copy counter value from user then go sleep again if target value wasn't reached. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 61 ++++++++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 648850b954a3..371dfc4243b3 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -680,7 +680,10 @@ struct hl_info_args { #define HL_MAX_CB_SIZE (0x200000 - 32) /* Indicates whether the command buffer should be mapped to the device's MMU */ -#define HL_CB_FLAGS_MAP 0x1 +#define HL_CB_FLAGS_MAP 0x1 + +/* Used with HL_CB_OP_INFO opcode to get the device va address for kernel mapped CB */ +#define HL_CB_FLAGS_GET_DEVICE_VA 0x2 struct hl_cb_in { /* Handle of CB or 0 if we want to create one */ @@ -702,11 +705,16 @@ struct hl_cb_out { /* Handle of CB */ __u64 cb_handle; - /* Information about CB */ - struct { - /* Usage count of CB */ - __u32 usage_cnt; - __u32 pad; + union { + /* Information about CB */ + struct { + /* Usage count of CB */ + __u32 usage_cnt; + __u32 pad; + }; + + /* CB mapped address to device MMU */ + __u64 device_va; }; }; }; @@ -947,9 +955,10 @@ union hl_cs_args { struct hl_cs_out out; }; -#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 -#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 -#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 +#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 +#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 +#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 +#define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10 #define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32 @@ -969,14 +978,23 @@ struct hl_wait_cs_in { }; struct { - /* User address for completion comparison. - * upon interrupt, driver will compare the value pointed - * by this address with the supplied target value. - * in order not to perform any comparison, set address - * to all 1s. - * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set - */ - __u64 addr; + union { + /* User address for completion comparison. + * upon interrupt, driver will compare the value pointed + * by this address with the supplied target value. + * in order not to perform any comparison, set address + * to all 1s. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set + */ + __u64 addr; + + /* cq_counters_handle to a kernel mapped cb which contains + * cq counters. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set + */ + __u64 cq_counters_handle; + }; + /* Target value for completion comparison */ __u64 target; }; @@ -1004,6 +1022,15 @@ struct hl_wait_cs_in { */ __u64 interrupt_timeout_us; }; + + /* + * cq counter offset inside the counters cb pointed by cq_counters_handle above. + * upon interrupt, driver will compare the value pointed + * by this address (cq_counters_handle + cq_counters_offset) + * with the supplied target value. + * relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set + */ + __u64 cq_counters_offset; }; #define HL_WAIT_CS_STATUS_COMPLETED 0 -- cgit v1.2.3 From 4fb0abfee424b05f0ec6d2d09e38f04ee2b82a8a Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 8 Nov 2021 15:51:21 -0600 Subject: x86/amd_nb: Add AMD Family 19h Models (10h-1Fh) and (A0h-AFh) PCI IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the new PCI Device IDs to support new generation of AMD 19h family of processors. Signed-off-by: Yazen Ghannam Signed-off-by: Babu Moger Acked-by: Krzysztof Wilczyński Acked-by: Borislav Petkov Acked-by: Bjorn Helgaas # pci_ids.h Link: https://lore.kernel.org/r/163640828133.955062.18349019796157170473.stgit@bmoger-ubuntu Signed-off-by: Guenter Roeck --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 011f2f1ea5bb..b5248f27910e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -555,6 +555,7 @@ #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F3 0x144b #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443 #define PCI_DEVICE_ID_AMD_19H_DF_F3 0x1653 +#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F3 0x14b0 #define PCI_DEVICE_ID_AMD_19H_M40H_DF_F3 0x167c #define PCI_DEVICE_ID_AMD_19H_M50H_DF_F3 0x166d #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 -- cgit v1.2.3 From 11a24ca7e34d968991a7d437b950d1924396bd81 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 25 Nov 2021 03:08:38 +0100 Subject: hwmon: (ntc_thermistor) Merge platform data into driver Platform data is supposed to be used with "board files", device descriptions in C. Since the introduction of the NTC driver in 2011, no such platforms have been submitted to the Linux kernel, and their use is strongly discouraged in favor of Device Tree, ACPI or as last resort software firmware nodes. Drop the external header and copy the platform data into the driver file. Cc: Peter Rosin Cc: Chris Lesiak Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20211125020841.3616359-2-linus.walleij@linaro.org Signed-off-by: Guenter Roeck --- include/linux/platform_data/ntc_thermistor.h | 50 ---------------------------- 1 file changed, 50 deletions(-) delete mode 100644 include/linux/platform_data/ntc_thermistor.h (limited to 'include') diff --git a/include/linux/platform_data/ntc_thermistor.h b/include/linux/platform_data/ntc_thermistor.h deleted file mode 100644 index b324d03e580c..000000000000 --- a/include/linux/platform_data/ntc_thermistor.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * ntc_thermistor.h - NTC Thermistors - * - * Copyright (C) 2010 Samsung Electronics - * MyungJoo Ham - */ -#ifndef _LINUX_NTC_H -#define _LINUX_NTC_H - -struct iio_channel; - -enum ntc_thermistor_type { - TYPE_B57330V2103, - TYPE_B57891S0103, - TYPE_NCPXXWB473, - TYPE_NCPXXWF104, - TYPE_NCPXXWL333, - TYPE_NCPXXXH103, -}; - -struct ntc_thermistor_platform_data { - /* - * One (not both) of read_uV and read_ohm should be provided and only - * one of the two should be provided. - * Both functions should return negative value for an error case. - * - * pullup_uV, pullup_ohm, pulldown_ohm, and connect are required to use - * read_uV() - * - * How to setup pullup_ohm, pulldown_ohm, and connect is - * described at Documentation/hwmon/ntc_thermistor.rst - * - * pullup/down_ohm: 0 for infinite / not-connected - * - * chan: iio_channel pointer to communicate with the ADC which the - * thermistor is using for conversion of the analog values. - */ - int (*read_uv)(struct ntc_thermistor_platform_data *); - unsigned int pullup_uv; - - unsigned int pullup_ohm; - unsigned int pulldown_ohm; - enum { NTC_CONNECTED_POSITIVE, NTC_CONNECTED_GROUND } connect; - struct iio_channel *chan; - - int (*read_ohm)(void); -}; - -#endif /* _LINUX_NTC_H */ -- cgit v1.2.3 From 130d168866a11829b844ffdb19b9aefe384f754c Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 16 Dec 2021 16:42:57 +0100 Subject: hwmon: prefix kernel-doc comments for structs with struct The command ./scripts/kernel-doc -none include/linux/hwmon.h warns: include/linux/hwmon.h:406: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Channel information include/linux/hwmon.h:425: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Chip configuration Address those kernel-doc warnings by prefixing kernel-doc descriptions for structs with the keyword 'struct'. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20211216154257.26758-1-lukas.bulwahn@gmail.com Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 1e8d6ea8992e..fad1f1df26df 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -403,7 +403,7 @@ struct hwmon_ops { }; /** - * Channel information + * struct hwmon_channel_info - Channel information * @type: Channel type. * @config: Pointer to NULL-terminated list of channel parameters. * Use for per-channel attributes. @@ -422,7 +422,7 @@ struct hwmon_channel_info { }) /** - * Chip configuration + * struct hwmon_chip_info - Chip configuration * @ops: Pointer to hwmon operations. * @info: Null-terminated list of channel information. */ -- cgit v1.2.3 From ee6d3dd4ed48ab24b74bab3c3977b8218518247d Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Fri, 24 Dec 2021 23:13:45 +0000 Subject: driver core: make kobj_type constant. This way instances of kobj_type (which contain function pointers) can be stored in .rodata, which means that they cannot be [easily/accidentally] modified at runtime. Signed-off-by: Wedson Almeida Filho Link: https://lore.kernel.org/r/20211224231345.777370-1-wedsonaf@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index c740062b4b1a..683172b2e094 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -66,7 +66,7 @@ struct kobject { struct list_head entry; struct kobject *parent; struct kset *kset; - struct kobj_type *ktype; + const struct kobj_type *ktype; struct kernfs_node *sd; /* sysfs directory entry */ struct kref kref; #ifdef CONFIG_DEBUG_KOBJECT_RELEASE @@ -90,13 +90,13 @@ static inline const char *kobject_name(const struct kobject *kobj) return kobj->name; } -extern void kobject_init(struct kobject *kobj, struct kobj_type *ktype); +extern void kobject_init(struct kobject *kobj, const struct kobj_type *ktype); extern __printf(3, 4) __must_check int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...); extern __printf(4, 5) __must_check int kobject_init_and_add(struct kobject *kobj, - struct kobj_type *ktype, struct kobject *parent, + const struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...); extern void kobject_del(struct kobject *kobj); @@ -217,7 +217,7 @@ static inline void kset_put(struct kset *k) kobject_put(&k->kobj); } -static inline struct kobj_type *get_ktype(struct kobject *kobj) +static inline const struct kobj_type *get_ktype(struct kobject *kobj) { return kobj->ktype; } -- cgit v1.2.3 From 253f06c7b1c1729b50e7ec52638e046239327bb1 Mon Sep 17 00:00:00 2001 From: Swapnil Jakhade Date: Thu, 23 Dec 2021 07:01:25 +0100 Subject: dt-bindings: phy: cadence-torrent: Rename SSC macros to use generic names Rename SSC macros to use generic names instead of PHY specific names, so that they can be used to specify SSC modes for both Torrent and Sierra. Renaming the macros should not affect the things as these are not being used in any DTS file yet. Signed-off-by: Swapnil Jakhade Acked-by: Rob Herring Link: https://lore.kernel.org/r/20211223060137.9252-4-sjakhade@cadence.com Signed-off-by: Vinod Koul --- include/dt-bindings/phy/phy-cadence.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/phy/phy-cadence.h b/include/dt-bindings/phy/phy-cadence.h index 24fdc9e11bd6..d55fe6e6b936 100644 --- a/include/dt-bindings/phy/phy-cadence.h +++ b/include/dt-bindings/phy/phy-cadence.h @@ -6,11 +6,11 @@ #ifndef _DT_BINDINGS_CADENCE_SERDES_H #define _DT_BINDINGS_CADENCE_SERDES_H -/* Torrent */ -#define TORRENT_SERDES_NO_SSC 0 -#define TORRENT_SERDES_EXTERNAL_SSC 1 -#define TORRENT_SERDES_INTERNAL_SSC 2 +#define CDNS_SERDES_NO_SSC 0 +#define CDNS_SERDES_EXTERNAL_SSC 1 +#define CDNS_SERDES_INTERNAL_SSC 2 +/* Torrent */ #define CDNS_TORRENT_REFCLK_DRIVER 0 #define CDNS_TORRENT_DERIVED_REFCLK 1 #define CDNS_TORRENT_RECEIVED_REFCLK 2 -- cgit v1.2.3 From 637feefb8ac53fbe1147edb707b03dc09839fdf5 Mon Sep 17 00:00:00 2001 From: Swapnil Jakhade Date: Thu, 23 Dec 2021 07:01:36 +0100 Subject: dt-bindings: phy: cadence-sierra: Add clock ID for derived reference clock Add clock ID for Sierra derived reference clock. Signed-off-by: Swapnil Jakhade Acked-by: Rob Herring Link: https://lore.kernel.org/r/20211223060137.9252-15-sjakhade@cadence.com Signed-off-by: Vinod Koul --- include/dt-bindings/phy/phy-cadence.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/phy/phy-cadence.h b/include/dt-bindings/phy/phy-cadence.h index d55fe6e6b936..0671991208fc 100644 --- a/include/dt-bindings/phy/phy-cadence.h +++ b/include/dt-bindings/phy/phy-cadence.h @@ -18,5 +18,6 @@ /* Sierra */ #define CDNS_SIERRA_PLL_CMNLC 0 #define CDNS_SIERRA_PLL_CMNLC1 1 +#define CDNS_SIERRA_DERIVED_REFCLK 2 #endif /* _DT_BINDINGS_CADENCE_SERDES_H */ -- cgit v1.2.3 From 7175f02c4e5f5a9430113ab9ca0fd0ce98b28a51 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sun, 26 Dec 2021 16:01:27 +0300 Subject: uapi: fix linux/nfc.h userspace compilation errors Replace sa_family_t with __kernel_sa_family_t to fix the following linux/nfc.h userspace compilation errors: /usr/include/linux/nfc.h:266:2: error: unknown type name 'sa_family_t' sa_family_t sa_family; /usr/include/linux/nfc.h:274:2: error: unknown type name 'sa_family_t' sa_family_t sa_family; Fixes: 23b7869c0fd0 ("NFC: add the NFC socket raw protocol") Fixes: d646960f7986 ("NFC: Initial LLCP support") Cc: Signed-off-by: Dmitry V. Levin Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/uapi/linux/nfc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index f6e3c8c9c744..aadad43d943a 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -263,7 +263,7 @@ enum nfc_sdp_attr { #define NFC_SE_ENABLED 0x1 struct sockaddr_nfc { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; @@ -271,7 +271,7 @@ struct sockaddr_nfc { #define NFC_LLCP_MAX_SERVICE_NAME 63 struct sockaddr_nfc_llcp { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; -- cgit v1.2.3 From 79b69a83705e621b258ac6d8ae6d3bfdb4b930aa Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 26 Dec 2021 13:03:47 +0100 Subject: nfc: uapi: use kernel size_t to fix user-space builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix user-space builds if it includes /usr/include/linux/nfc.h before some of other headers: /usr/include/linux/nfc.h:281:9: error: unknown type name ‘size_t’ 281 | size_t service_name_len; | ^~~~~~ Fixes: d646960f7986 ("NFC: Initial LLCP support") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/uapi/linux/nfc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index aadad43d943a..4fa4e979e948 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -278,7 +278,7 @@ struct sockaddr_nfc_llcp { __u8 dsap; /* Destination SAP, if known */ __u8 ssap; /* Source SAP to be bound to */ char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */; - size_t service_name_len; + __kernel_size_t service_name_len; }; /* NFC socket protocols */ -- cgit v1.2.3 From f81bdeaf816142e0729eea0cc84c395ec9673151 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Wed, 22 Dec 2021 16:57:34 +0100 Subject: ACPICA: actypes.h: Expand the ACPI_ACCESS_ definitions ACPICA commit bc02c76d518135531483dfc276ed28b7ee632ce1 The current ACPI_ACCESS_*_WIDTH defines do not provide a way to test that size is small enough to not cause an overflow when applied to a 32-bit integer. Rather than adding more magic numbers, add ACPI_ACCESS_*_SHIFT, ACPI_ACCESS_*_MAX, and ACPI_ACCESS_*_DEFAULT #defines and redefine ACPI_ACCESS_*_WIDTH in terms of the new #defines. This was inititally reported on Linux where a size of 102 in ACPI_ACCESS_BIT_WIDTH caused an overflow error in the SPCR initialization code. Link: https://github.com/acpica/acpica/commit/bc02c76d Signed-off-by: Mark Langsdorf Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/actypes.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index ff8b3c913f21..248242dca28d 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -536,8 +536,14 @@ typedef u64 acpi_integer; * Can be used with access_width of struct acpi_generic_address and access_size of * struct acpi_resource_generic_register. */ -#define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2)) -#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1)) +#define ACPI_ACCESS_BIT_SHIFT 2 +#define ACPI_ACCESS_BYTE_SHIFT -1 +#define ACPI_ACCESS_BIT_MAX (31 - ACPI_ACCESS_BIT_SHIFT) +#define ACPI_ACCESS_BYTE_MAX (31 - ACPI_ACCESS_BYTE_SHIFT) +#define ACPI_ACCESS_BIT_DEFAULT (8 - ACPI_ACCESS_BIT_SHIFT) +#define ACPI_ACCESS_BYTE_DEFAULT (8 - ACPI_ACCESS_BYTE_SHIFT) +#define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + ACPI_ACCESS_BIT_SHIFT)) +#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) + ACPI_ACCESS_BYTE_SHIFT)) /******************************************************************************* * -- cgit v1.2.3 From ca25f92b72d25457653dbf2a81f322235804fb05 Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Wed, 22 Dec 2021 17:21:25 +0100 Subject: ACPICA: Use original data_table_region pointer for accesses ACPICA commit d9eb82bd7515989f0b29d79deeeb758db4d6529c Currently the pointer to the table is cast to acpi_physical_address and later cast back to a pointer to be dereferenced. Whether or not this is supported is implementation-defined. On CHERI, and thus Arm's experimental Morello prototype architecture, pointers are represented as capabilities, which are unforgeable bounded pointers, providing always-on fine-grained spatial memory safety. This means that any pointer cast to a plain integer will lose all its associated metadata, and when cast back to a pointer it will give a null-derived pointer (one that has the same metadata as null but an address equal to the integer) that will trap on any dereference. As a result, this is an implementation where acpi_physical_address cannot be used as a hack to store real pointers. Thus, add a new field to struct acpi_object_region to store the pointer for table regions, and propagate it to acpi_ex_data_table_space_handler via the region context, to use a more portable implementation that supports CHERI. Link: https://github.com/acpica/acpica/commit/d9eb82bd Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/actypes.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 248242dca28d..700c2449e85a 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -1221,6 +1221,10 @@ struct acpi_mem_space_context { struct acpi_mem_mapping *first_mm; }; +struct acpi_data_table_space_context { + void *pointer; +}; + /* * struct acpi_memory_list is used only if the ACPICA local cache is enabled */ -- cgit v1.2.3 From 5d6e59665d8bb0f8fa4d47726a93326f8ddfb448 Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Wed, 22 Dec 2021 17:22:28 +0100 Subject: ACPICA: Use original pointer for virtual origin tables ACPICA commit dfa3feffa8f760b686207d09dc880cd2f26c72af Currently the pointer to the table is cast to acpi_physical_address and later cast back to a pointer to be dereferenced. Whether or not this is supported is implementation-defined. On CHERI, and thus Arm's experimental Morello prototype architecture, pointers are represented as capabilities, which are unforgeable bounded pointers, providing always-on fine-grained spatial memory safety. This means that any pointer cast to a plain integer will lose all its associated metadata, and when cast back to a pointer it will give a null-derived pointer (one that has the same metadata as null but an address equal to the integer) that will trap on any dereference. As a result, this is an implementation where acpi_physical_address cannot be used as a hack to store real pointers. Thus, alter the lifecycle of table descriptors. Internal physical tables keep the current behaviour where only the address is set on install, and the pointer is set on acquire. Virtual tables (internal and external) now store the pointer on initialisation and use that on acquire (which will redundantly set *table_ptr to itself, but changing that is both unnecessary and overly complicated as acpi_tb_acquire_table is called with both a pointer to a variable and a pointer to Table->Pointer itself). This requires propagating the (possible) table pointer everywhere in order to make sure pointers make it through to acpi_tb_acquire_temp_table, which requires a change to the acpi_install_table interface. Instead of taking an ACPI_PHYSADDR_TYPE and a boolean indicating whether it's physical or virtual, it is now split into acpi_install_table (that takes an external virtual table pointer) and acpi_install_physical_table (that takes an ACPI_PHYSADDR_TYPE for an internal physical table address). This also has the benefit of providing a cleaner API. Link: https://github.com/acpica/acpica/commit/dfa3feff Signed-off-by: Bob Moore [ rjw: Adjust the code in tables.c to match interface changes ] Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 73ba13914321..987bb0aa042e 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -454,9 +454,11 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status * ACPI table load/unload interfaces */ ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION - acpi_install_table(acpi_physical_address address, - u8 physical)) + acpi_install_table(struct acpi_table_header *table)) +ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION + acpi_install_physical_table(acpi_physical_address + address)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_load_table(struct acpi_table_header *table, u32 *table_idx)) -- cgit v1.2.3 From 339651be3704f336634d90c000a7778b86e1be99 Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Wed, 22 Dec 2021 17:23:28 +0100 Subject: ACPICA: Macros: Remove ACPI_PHYSADDR_TO_PTR ACPICA commit 52abebd410945ec55afb4dd8b7150e8a39b5c960 This macro was only ever used when stuffing pointers into physical addresses and trying to later reconstruct the pointer, which is implementation-defined as to whether that can be done. Now that all such operations are gone, the macro is unused, and should be removed to avoid such practices being reintroduced. Link: https://github.com/acpica/acpica/commit/52abebd4 Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/actypes.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 700c2449e85a..83a7ee1fbd7c 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -509,7 +509,6 @@ typedef u64 acpi_integer; #define ACPI_TO_POINTER(i) ACPI_CAST_PTR (void, (acpi_size) (i)) #define ACPI_TO_INTEGER(p) ACPI_PTR_DIFF (p, (void *) 0) #define ACPI_OFFSET(d, f) ACPI_PTR_DIFF (&(((d *) 0)->f), (void *) 0) -#define ACPI_PHYSADDR_TO_PTR(i) ACPI_TO_POINTER(i) #define ACPI_PTR_TO_PHYSADDR(i) ACPI_TO_INTEGER(i) /* Optimizations for 4-character (32-bit) acpi_name manipulation */ -- cgit v1.2.3 From e4a07f5acd730802ada629d52a29c4873f9826a4 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 22 Dec 2021 17:25:40 +0100 Subject: ACPICA: iASL/Disassembler: Additional support for NHLT table ACPICA commit 0420852ffc520b81960e877852703b739c16025c Added support for Vendor-defined microphone arrays and SNR (signal-to-noise) extension. Link: https://github.com/acpica/acpica/commit/0420852f Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl2.h | 72 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 71ca090fd61b..380bff6bb2fd 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -1495,12 +1495,10 @@ struct acpi_nhlt_device_specific_config_a { /* Values for Config Type above */ -#define ACPI_NHLT_TYPE_MIC_ARRAY 0x01 -#define ACPI_NHLT_TYPE_GENERIC 0x00 - -/* Mask for Extension field of array_type */ - -#define ACPI_NHLT_ARRAY_TYPE_MASK 0x10 +#define ACPI_NHLT_CONFIG_TYPE_GENERIC 0x00 +#define ACPI_NHLT_CONFIG_TYPE_MIC_ARRAY 0x01 +#define ACPI_NHLT_CONFIG_TYPE_RENDER_FEEDBACK 0x03 +#define ACPI_NHLT_CONFIG_TYPE_RESERVED 0x04 /* 4 and above are reserved */ struct acpi_nhlt_device_specific_config_b { u32 capabilities_size; @@ -1511,6 +1509,11 @@ struct acpi_nhlt_device_specific_config_c { u8 virtual_slot; }; +struct acpi_nhlt_render_device_specific_config { + u32 capabilities_size; + u8 virtual_slot; +}; + struct acpi_nhlt_wave_extensible { u16 format_tag; u16 channel_count; @@ -1573,17 +1576,22 @@ struct acpi_nhlt_mic_device_specific_config { /* Values for array_type_ext above */ -#define SMALL_LINEAR_2ELEMENT 0x0A -#define BIG_LINEAR_2ELEMENT 0x0B -#define FIRST_GEOMETRY_LINEAR_4ELEMENT 0x0C -#define PLANAR_LSHAPED_4ELEMENT 0x0D -#define SECOND_GEOMETRY_LINEAR_4ELEMENT 0x0E -#define VENDOR_DEFINED 0x0F -#define ARRAY_TYPE_MASK 0x0F -#define ARRAY_TYPE_EXT_MASK 0x10 +#define ACPI_NHLT_ARRAY_TYPE_RESERVED 0x09 // 9 and below are reserved +#define ACPI_NHLT_SMALL_LINEAR_2ELEMENT 0x0A +#define ACPI_NHLT_BIG_LINEAR_2ELEMENT 0x0B +#define ACPI_NHLT_FIRST_GEOMETRY_LINEAR_4ELEMENT 0x0C +#define ACPI_NHLT_PLANAR_LSHAPED_4ELEMENT 0x0D +#define ACPI_NHLT_SECOND_GEOMETRY_LINEAR_4ELEMENT 0x0E +#define ACPI_NHLT_VENDOR_DEFINED 0x0F +#define ACPI_NHLT_ARRAY_TYPE_MASK 0x0F +#define ACPI_NHLT_ARRAY_TYPE_EXT_MASK 0x10 + +#define ACPI_NHLT_NO_EXTENSION 0x0 +#define ACPI_NHLT_MIC_SNR_SENSITIVITY_EXT (1<<4) -#define NO_EXTENSION 0x0 -#define MIC_SNR_SENSITIVITY_EXT 0x1 +struct acpi_nhlt_vendor_mic_count { + u8 microphone_count; +}; struct acpi_nhlt_vendor_mic_config { u8 type; @@ -1603,22 +1611,25 @@ struct acpi_nhlt_vendor_mic_config { /* Values for Type field above */ -#define MIC_OMNIDIRECTIONAL 0 -#define MIC_SUBCARDIOID 1 -#define MIC_CARDIOID 2 -#define MIC_SUPER_CARDIOID 3 -#define MIC_HYPER_CARDIOID 4 -#define MIC_8_SHAPED 5 -#define MIC_VENDOR_DEFINED 7 +#define ACPI_NHLT_MIC_OMNIDIRECTIONAL 0 +#define ACPI_NHLT_MIC_SUBCARDIOID 1 +#define ACPI_NHLT_MIC_CARDIOID 2 +#define ACPI_NHLT_MIC_SUPER_CARDIOID 3 +#define ACPI_NHLT_MIC_HYPER_CARDIOID 4 +#define ACPI_NHLT_MIC_8_SHAPED 5 +#define ACPI_NHLT_MIC_RESERVED6 6 // 6 is reserved +#define ACPI_NHLT_MIC_VENDOR_DEFINED 7 +#define ACPI_NHLT_MIC_RESERVED 8 // 8 and above are reserved /* Values for Panel field above */ -#define MIC_TOP 0 -#define MIC_BOTTOM 1 -#define MIC_LEFT 2 -#define MIC_RIGHT 3 -#define MIC_FRONT 4 -#define MIC_REAR 5 +#define ACPI_NHLT_MIC_POSITION_TOP 0 +#define ACPI_NHLT_MIC_POSITION_BOTTOM 1 +#define ACPI_NHLT_MIC_POSITION_LEFT 2 +#define ACPI_NHLT_MIC_POSITION_RIGHT 3 +#define ACPI_NHLT_MIC_POSITION_FRONT 4 +#define ACPI_NHLT_MIC_POSITION_BACK 5 +#define ACPI_NHLT_MIC_POSITION_RESERVED 6 // 6 and above are reserved struct acpi_nhlt_vendor_mic_device_specific_config { struct acpi_nhlt_mic_device_specific_config mic_array_device_config; @@ -1633,8 +1644,9 @@ struct acpi_nhlt_mic_snr_sensitivity_extension { u32 sensitivity; }; +/* Render device with feedback */ + struct acpi_nhlt_render_feedback_device_specific_config { - struct acpi_nhlt_device_specific_config device_config; u8 feedback_virtual_slot; // render slot in case of capture u16 feedback_channels; // informative only u16 feedback_valid_bits_per_sample; -- cgit v1.2.3 From 00395b74d57ff81795ec392627db7e1764c94941 Mon Sep 17 00:00:00 2001 From: Shuuichirou Ishii Date: Wed, 22 Dec 2021 17:28:34 +0100 Subject: ACPICA: Fix AEST Processor generic resource substructure data field byte length ACPICA commit 13b9327761955f6e1e5dbf748b3112940c0dc539 The byte length of the Data field in the AEST Processor generic resource substructure defined in ACPI for the Armv8 RAS Extensions 1.1 is 4Byte. However, it is defined as a pointer type, and on a 64-bit machine, it is interpreted as 8 bytes. Therefore, it is changed from a pointer type unsigned integer 1 byte to an unsigned integer 4 bytes. Link: https://github.com/acpica/acpica/commit/13b93277 Signed-off-by: Shuuichirou Ishii Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 380bff6bb2fd..1b0fac6ffc3f 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -154,7 +154,7 @@ typedef struct acpi_aest_processor_tlb { /* 2R: Processor Generic Resource Substructure */ typedef struct acpi_aest_processor_generic { - u8 *resource; + u32 resource; } acpi_aest_processor_generic; -- cgit v1.2.3 From 0acf24ad7e10f547809faefb8069f8f5482eb4d9 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 22 Dec 2021 17:32:54 +0100 Subject: ACPICA: Add support for PCC Opregion special context data ACPICA commit 55526e8a6133cbf5a9cc0fb75a95dbbac6eb98e6 PCC Opregion added in ACPIC 6.3 requires special context data similar to GPIO and Generic Serial Bus as it needs to know the internal PCC buffer and its length as well as the PCC channel index when the opregion handler is being executed by the OSPM. Lets add support for the special context data needed by PCC Opregion. Link: https://github.com/acpica/acpica/commit/55526e8a Signed-off-by: Sudeep Holla Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/actypes.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 83a7ee1fbd7c..69e89d572b9e 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -1103,6 +1103,14 @@ struct acpi_connection_info { u8 access_length; }; +/* Special Context data for PCC Opregion (ACPI 6.3) */ + +struct acpi_pcc_info { + u8 subspace_id; + u16 length; + u8 *internal_buffer; +}; + typedef acpi_status (*acpi_adr_space_setup) (acpi_handle region_handle, u32 function, -- cgit v1.2.3 From 2de6bb92ebbb0e8803554bb24391514b552cb481 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 22 Dec 2021 17:36:30 +0100 Subject: ACPICA: iASL: Add TDEL table to both compiler/disassembler ACPICA commit 403f9965aba7ff9d2ed5b41bbffdd2a1ed0f596f Added struct acpi_pcc_info to acpi_src. Link: https://github.com/acpica/acpica/commit/403f9965 Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl2.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 1b0fac6ffc3f..18b7b9138607 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -48,6 +48,7 @@ #define ACPI_SIG_SDEI "SDEI" /* Software Delegated Exception Interface Table */ #define ACPI_SIG_SDEV "SDEV" /* Secure Devices table */ #define ACPI_SIG_SVKL "SVKL" /* Storage Volume Key Location Table */ +#define ACPI_SIG_TDEL "TDEL" /* TD Event Log Table */ /* * All tables must be byte-packed to match the ACPI specification, since @@ -2467,6 +2468,22 @@ enum acpi_svkl_format { ACPI_SVKL_FORMAT_RESERVED = 1 /* 1 and greater are reserved */ }; +/******************************************************************************* + * + * TDEL - TD-Event Log + * From: "Guest-Host-Communication Interface (GHCI) for Intel + * Trust Domain Extensions (Intel TDX)". + * September 2020 + * + ******************************************************************************/ + +struct acpi_table_tdel { + struct acpi_table_header header; /* Common ACPI table header */ + u32 reserved; + u64 log_area_minimum_length; + u64 log_area_start_address; +}; + /* Reset to default packing */ #pragma pack() -- cgit v1.2.3 From 5579649e7eb756a4e3d5784b6958374e5bfc41de Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 22 Dec 2021 17:37:21 +0100 Subject: ACPICA: iASL: Add suppport for AGDI table ACPICA commit cf36a6d658ca5aa8c329c2edfc3322c095ffd844 Add support for Arm Generic Diagnostic Dump and Reset Interface, which is described by "ACPI for Arm Components 1.1 Platform Design Document" ARM DEN0093. Add the necessary types in the ACPICA header files and support for compiling and decompiling the table. Link: https://github.com/acpica/acpica/commit/cf36a6d6 Signed-off-by: Ilkka Koskinen Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl2.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 18b7b9138607..57481f6f1ca5 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -24,6 +24,7 @@ * file. Useful because they make it more difficult to inadvertently type in * the wrong signature. */ +#define ACPI_SIG_AGDI "AGDI" /* Arm Generic Diagnostic Dump and Reset Device Interface */ #define ACPI_SIG_BDAT "BDAT" /* BIOS Data ACPI Table */ #define ACPI_SIG_IORT "IORT" /* IO Remapping Table */ #define ACPI_SIG_IVRS "IVRS" /* I/O Virtualization Reporting Structure */ @@ -238,6 +239,25 @@ typedef struct acpi_aest_node_interrupt { #define ACPI_AEST_NODE_ERROR_RECOVERY 1 #define ACPI_AEST_XRUPT_RESERVED 2 /* 2 and above are reserved */ +/******************************************************************************* + * AGDI - Arm Generic Diagnostic Dump and Reset Device Interface + * + * Conforms to "ACPI for Arm Components 1.1, Platform Design Document" + * ARM DEN0093 v1.1 + * + ******************************************************************************/ +struct acpi_table_agdi { + struct acpi_table_header header; /* Common ACPI table header */ + u8 flags; + u8 reserved[3]; + u32 sdei_event; + u32 gsiv; +}; + +/* Mask for Flags field above */ + +#define ACPI_AGDI_SIGNALING_MODE (1) + /******************************************************************************* * * BDAT - BIOS Data ACPI Table -- cgit v1.2.3 From 0c9a672729d62d27d0c9993e106707be0b0c2bc0 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 22 Dec 2021 17:37:56 +0100 Subject: ACPICA: iASL/NHLT table: "Specific Data" field support ACPICA commit 26f8c721fb01e4a26eec8c85dffcbe950d5e61a9 Add support for optional "Specific Data" field for the optional Linux-specific structure that appears at the end of an Endpoint Descriptor. Link: https://github.com/acpica/acpica/commit/26f8c721 Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl2.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 57481f6f1ca5..16847c8d9d5f 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -1683,7 +1683,10 @@ struct acpi_nhlt_linux_specific_data { u8 device_id[16]; u8 device_instance_id; u8 device_port_id; - u8 filler[18]; +}; + +struct acpi_nhlt_linux_specific_data_b { + u8 specific_data[18]; }; struct acpi_nhlt_table_terminator { -- cgit v1.2.3 From c95545a036705e1a78b40a83701c8d3868898114 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 22 Dec 2021 17:38:29 +0100 Subject: ACPICA: Update version to 20211217 ACPICA commit 90088defcb99e122edf41038ae5c901206c86dc9 Version 20211217. Link: https://github.com/acpica/acpica/commit/90088def Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 987bb0aa042e..7417731472b7 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -12,7 +12,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20210930 +#define ACPI_CA_VERSION 0x20211217 #include #include -- cgit v1.2.3 From 1882de7fc56c2b0ea91dd9fd9922d434fc3feb15 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 22 Dec 2021 12:31:03 +0800 Subject: efi: Introduce EFI_FIRMWARE_MANAGEMENT_CAPSULE_HEADER and corresponding structures Platform Firmware Runtime Update image starts with UEFI headers, and the headers are defined in UEFI specification, but some of them have not been defined in the kernel yet. For example, the header layout of a capsule file looks like this: EFI_CAPSULE_HEADER EFI_FIRMWARE_MANAGEMENT_CAPSULE_HEADER EFI_FIRMWARE_MANAGEMENT_CAPSULE_IMAGE_HEADER EFI_FIRMWARE_IMAGE_AUTHENTICATION These structures would be used by the Platform Firmware Runtime Update driver to parse the format of capsule file to verify if the corresponding version number is valid. In this way, if the user provides an invalid capsule image, the kernel could be used as a guard to reject it, without switching to the Management Mode (which might be costly). EFI_CAPSULE_HEADER has been defined in the kernel, but the other structures have not been defined yet, so do that. Besides, EFI_FIRMWARE_MANAGEMENT_CAPSULE_HEADER and EFI_FIRMWARE_MANAGEMENT_CAPSULE_IMAGE_HEADER are required to be packed in the uefi specification. For this reason, use the __packed attribute to indicate to the compiler that the entire structure can appear misaligned in memory (as suggested by Ard) in case one of them follows the other directly in a capsule header. Acked-by: Ard Biesheuvel Signed-off-by: Chen Yu Signed-off-by: Rafael J. Wysocki --- include/linux/efi.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index dbd39b20e034..80e970f7e6f8 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -148,6 +148,52 @@ typedef struct { u32 imagesize; } efi_capsule_header_t; +/* EFI_FIRMWARE_MANAGEMENT_CAPSULE_HEADER */ +struct efi_manage_capsule_header { + u32 ver; + u16 emb_drv_cnt; + u16 payload_cnt; + /* + * Variable-size array of the size given by the sum of + * emb_drv_cnt and payload_cnt. + */ + u64 offset_list[]; +} __packed; + +/* EFI_FIRMWARE_MANAGEMENT_CAPSULE_IMAGE_HEADER */ +struct efi_manage_capsule_image_header { + u32 ver; + efi_guid_t image_type_id; + u8 image_index; + u8 reserved_bytes[3]; + u32 image_size; + u32 vendor_code_size; + /* hw_ins was introduced in version 2 */ + u64 hw_ins; + /* capsule_support was introduced in version 3 */ + u64 capsule_support; +} __packed; + +/* WIN_CERTIFICATE */ +struct win_cert { + u32 len; + u16 rev; + u16 cert_type; +}; + +/* WIN_CERTIFICATE_UEFI_GUID */ +struct win_cert_uefi_guid { + struct win_cert hdr; + efi_guid_t cert_type; + u8 cert_data[]; +}; + +/* EFI_FIRMWARE_IMAGE_AUTHENTICATION */ +struct efi_image_auth { + u64 mon_count; + struct win_cert_uefi_guid auth_info; +}; + /* * EFI capsule flags */ -- cgit v1.2.3 From 0db89fa243e5edc5de38c88b369e4c3755c5fb74 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 22 Dec 2021 12:31:41 +0800 Subject: ACPI: Introduce Platform Firmware Runtime Update device driver Introduce the pfr_update driver which can be used for Platform Firmware Runtime code injection and driver update [1]. The user is expected to provide the EFI capsule, and pass it to the driver by writing the capsule to a device special file. The capsule is transferred by the driver to the platform firmware with the help of an ACPI _DSM method under the special ACPI Platform Firmware Runtime Update device (INTC1080), and the actual firmware update is carried out by the low-level Management Mode code in the platform firmware. This change allows certain pieces of the platform firmware to be updated on the fly while the system is running (runtime) without the need to restart it, which is key in the cases when the system needs to be available 100% of the time and it cannot afford the downtime related to restarting it, or when the work carried out by the system is particularly important, so it cannot be interrupted, and it is not practical to wait until it is complete. Link: https://uefi.org/sites/default/files/resources/Intel_MM_OS_Interface_Spec_Rev100.pdf # [1] Tested-by: Hongyu Ning Signed-off-by: Chen Yu [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/uapi/linux/pfrut.h | 174 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 include/uapi/linux/pfrut.h (limited to 'include') diff --git a/include/uapi/linux/pfrut.h b/include/uapi/linux/pfrut.h new file mode 100644 index 000000000000..fa97e80a93b7 --- /dev/null +++ b/include/uapi/linux/pfrut.h @@ -0,0 +1,174 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Platform Firmware Runtime Update header + * + * Copyright(c) 2021 Intel Corporation. All rights reserved. + */ +#ifndef __PFRUT_H__ +#define __PFRUT_H__ + +#include +#include + +#define PFRUT_IOCTL_MAGIC 0xEE + +/** + * PFRU_IOC_SET_REV - _IOW(PFRUT_IOCTL_MAGIC, 0x01, unsigned int) + * + * Return: + * * 0 - success + * * -EFAULT - fail to read the revision id + * * -EINVAL - user provides an invalid revision id + * + * Set the Revision ID for Platform Firmware Runtime Update. + */ +#define PFRU_IOC_SET_REV _IOW(PFRUT_IOCTL_MAGIC, 0x01, unsigned int) + +/** + * PFRU_IOC_STAGE - _IOW(PFRUT_IOCTL_MAGIC, 0x02, unsigned int) + * + * Return: + * * 0 - success + * * -EINVAL - stage phase returns invalid result + * + * Stage a capsule image from communication buffer and perform authentication. + */ +#define PFRU_IOC_STAGE _IOW(PFRUT_IOCTL_MAGIC, 0x02, unsigned int) + +/** + * PFRU_IOC_ACTIVATE - _IOW(PFRUT_IOCTL_MAGIC, 0x03, unsigned int) + * + * Return: + * * 0 - success + * * -EINVAL - activate phase returns invalid result + * + * Activate a previously staged capsule image. + */ +#define PFRU_IOC_ACTIVATE _IOW(PFRUT_IOCTL_MAGIC, 0x03, unsigned int) + +/** + * PFRU_IOC_STAGE_ACTIVATE - _IOW(PFRUT_IOCTL_MAGIC, 0x04, unsigned int) + * + * Return: + * * 0 - success + * * -EINVAL - stage/activate phase returns invalid result. + * + * Perform both stage and activation action. + */ +#define PFRU_IOC_STAGE_ACTIVATE _IOW(PFRUT_IOCTL_MAGIC, 0x04, unsigned int) + +/** + * PFRU_IOC_QUERY_CAP - _IOR(PFRUT_IOCTL_MAGIC, 0x05, + * struct pfru_update_cap_info) + * + * Return: + * * 0 - success + * * -EINVAL - query phase returns invalid result + * * -EFAULT - the result fails to be copied to userspace + * + * Retrieve information on the Platform Firmware Runtime Update capability. + * The information is a struct pfru_update_cap_info. + */ +#define PFRU_IOC_QUERY_CAP _IOR(PFRUT_IOCTL_MAGIC, 0x05, struct pfru_update_cap_info) + +/** + * struct pfru_payload_hdr - Capsule file payload header. + * + * @sig: Signature of this capsule file. + * @hdr_version: Revision of this header structure. + * @hdr_size: Size of this header, including the OemHeader bytes. + * @hw_ver: The supported firmware version. + * @rt_ver: Version of the code injection image. + * @platform_id: A platform specific GUID to specify the platform what + * this capsule image support. + */ +struct pfru_payload_hdr { + __u32 sig; + __u32 hdr_version; + __u32 hdr_size; + __u32 hw_ver; + __u32 rt_ver; + __u8 platform_id[16]; +}; + +enum pfru_dsm_status { + DSM_SUCCEED = 0, + DSM_FUNC_NOT_SUPPORT = 1, + DSM_INVAL_INPUT = 2, + DSM_HARDWARE_ERR = 3, + DSM_RETRY_SUGGESTED = 4, + DSM_UNKNOWN = 5, + DSM_FUNC_SPEC_ERR = 6, +}; + +/** + * struct pfru_update_cap_info - Runtime update capability information. + * + * @status: Indicator of whether this query succeed. + * @update_cap: Bitmap to indicate whether the feature is supported. + * @code_type: A buffer containing an image type GUID. + * @fw_version: Platform firmware version. + * @code_rt_version: Code injection runtime version for anti-rollback. + * @drv_type: A buffer containing an image type GUID. + * @drv_rt_version: The version of the driver update runtime code. + * @drv_svn: The secure version number(SVN) of the driver update runtime code. + * @platform_id: A buffer containing a platform ID GUID. + * @oem_id: A buffer containing an OEM ID GUID. + * @oem_info_len: Length of the buffer containing the vendor specific information. + */ +struct pfru_update_cap_info { + __u32 status; + __u32 update_cap; + + __u8 code_type[16]; + __u32 fw_version; + __u32 code_rt_version; + + __u8 drv_type[16]; + __u32 drv_rt_version; + __u32 drv_svn; + + __u8 platform_id[16]; + __u8 oem_id[16]; + + __u32 oem_info_len; +}; + +/** + * struct pfru_com_buf_info - Communication buffer information. + * + * @status: Indicator of whether this query succeed. + * @ext_status: Implementation specific query result. + * @addr_lo: Low 32bit physical address of the communication buffer to hold + * a runtime update package. + * @addr_hi: High 32bit physical address of the communication buffer to hold + * a runtime update package. + * @buf_size: Maximum size in bytes of the communication buffer. + */ +struct pfru_com_buf_info { + __u32 status; + __u32 ext_status; + __u64 addr_lo; + __u64 addr_hi; + __u32 buf_size; +}; + +/** + * struct pfru_updated_result - Platform firmware runtime update result information. + * @status: Indicator of whether this update succeed. + * @ext_status: Implementation specific update result. + * @low_auth_time: Low 32bit value of image authentication time in nanosecond. + * @high_auth_time: High 32bit value of image authentication time in nanosecond. + * @low_exec_time: Low 32bit value of image execution time in nanosecond. + * @high_exec_time: High 32bit value of image execution time in nanosecond. + */ +struct pfru_updated_result { + __u32 status; + __u32 ext_status; + __u64 low_auth_time; + __u64 high_auth_time; + __u64 low_exec_time; + __u64 high_exec_time; +}; + +#endif /* __PFRUT_H__ */ -- cgit v1.2.3 From b0013e037a8b07772c74ce24f1ae4743b30fc3cf Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 22 Dec 2021 12:32:02 +0800 Subject: ACPI: Introduce Platform Firmware Runtime Telemetry driver This driver allows user space to fetch telemetry data from the firmware with the help of the Platform Firmware Runtime Telemetry interface. Both PFRU and PFRT are based on ACPI _DSM interfaces located under special device objects in the ACPI Namespace, but these interfaces are different from each other, so it is better to provide a separate driver from each of them, even though they share some common definitions and naming conventions. Tested-by: Hongyu Ning Signed-off-by: Chen Yu [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/uapi/linux/pfrut.h | 88 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pfrut.h b/include/uapi/linux/pfrut.h index fa97e80a93b7..42fa15f8310d 100644 --- a/include/uapi/linux/pfrut.h +++ b/include/uapi/linux/pfrut.h @@ -171,4 +171,92 @@ struct pfru_updated_result { __u64 high_exec_time; }; +/** + * struct pfrt_log_data_info - Log Data from telemetry service. + * @status: Indicator of whether this update succeed. + * @ext_status: Implementation specific update result. + * @chunk1_addr_lo: Low 32bit physical address of the telemetry data chunk1 + * starting address. + * @chunk1_addr_hi: High 32bit physical address of the telemetry data chunk1 + * starting address. + * @chunk2_addr_lo: Low 32bit physical address of the telemetry data chunk2 + * starting address. + * @chunk2_addr_hi: High 32bit physical address of the telemetry data chunk2 + * starting address. + * @max_data_size: Maximum supported size of data of all data chunks combined. + * @chunk1_size: Data size in bytes of the telemetry data chunk1 buffer. + * @chunk2_size: Data size in bytes of the telemetry data chunk2 buffer. + * @rollover_cnt: Number of times telemetry data buffer is overwritten + * since telemetry buffer reset. + * @reset_cnt: Number of times telemetry services resets that results in + * rollover count and data chunk buffers are reset. + */ +struct pfrt_log_data_info { + __u32 status; + __u32 ext_status; + __u64 chunk1_addr_lo; + __u64 chunk1_addr_hi; + __u64 chunk2_addr_lo; + __u64 chunk2_addr_hi; + __u32 max_data_size; + __u32 chunk1_size; + __u32 chunk2_size; + __u32 rollover_cnt; + __u32 reset_cnt; +}; + +/** + * struct pfrt_log_info - Telemetry log information. + * @log_level: The telemetry log level. + * @log_type: The telemetry log type(history and execution). + * @log_revid: The telemetry log revision id. + */ +struct pfrt_log_info { + __u32 log_level; + __u32 log_type; + __u32 log_revid; +}; + +/** + * PFRT_LOG_IOC_SET_INFO - _IOW(PFRUT_IOCTL_MAGIC, 0x06, + * struct pfrt_log_info) + * + * Return: + * * 0 - success + * * -EFAULT - fail to get the setting parameter + * * -EINVAL - fail to set the log level + * + * Set the PFRT log level and log type. The input information is + * a struct pfrt_log_info. + */ +#define PFRT_LOG_IOC_SET_INFO _IOW(PFRUT_IOCTL_MAGIC, 0x06, struct pfrt_log_info) + +/** + * PFRT_LOG_IOC_GET_INFO - _IOR(PFRUT_IOCTL_MAGIC, 0x07, + * struct pfrt_log_info) + * + * Return: + * * 0 - success + * * -EINVAL - fail to get the log level + * * -EFAULT - fail to copy the result back to userspace + * + * Retrieve log level and log type of the telemetry. The information is + * a struct pfrt_log_info. + */ +#define PFRT_LOG_IOC_GET_INFO _IOR(PFRUT_IOCTL_MAGIC, 0x07, struct pfrt_log_info) + +/** + * PFRT_LOG_IOC_GET_DATA_INFO - _IOR(PFRUT_IOCTL_MAGIC, 0x08, + * struct pfrt_log_data_info) + * + * Return: + * * 0 - success + * * -EINVAL - fail to get the log buffer information + * * -EFAULT - fail to copy the log buffer information to userspace + * + * Retrieve data information about the telemetry. The information + * is a struct pfrt_log_data_info. + */ +#define PFRT_LOG_IOC_GET_DATA_INFO _IOR(PFRUT_IOCTL_MAGIC, 0x08, struct pfrt_log_data_info) + #endif /* __PFRUT_H__ */ -- cgit v1.2.3 From cf6299b6101903c31bddb0065804b2121ed510c7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 27 Dec 2021 17:39:24 +0100 Subject: kobject: remove kset from struct kset_uevent_ops callbacks There is no need to pass the pointer to the kset in the struct kset_uevent_ops callbacks as no one uses it, so just remove that pointer entirely. Reviewed-by: Rafael J. Wysocki Reviewed-by: Wedson Almeida Filho Link: https://lore.kernel.org/r/20211227163924.3970661-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 683172b2e094..ad90b49824dc 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -153,10 +153,9 @@ struct kobj_uevent_env { }; struct kset_uevent_ops { - int (* const filter)(struct kset *kset, struct kobject *kobj); - const char *(* const name)(struct kset *kset, struct kobject *kobj); - int (* const uevent)(struct kset *kset, struct kobject *kobj, - struct kobj_uevent_env *env); + int (* const filter)(struct kobject *kobj); + const char *(* const name)(struct kobject *kobj); + int (* const uevent)(struct kobject *kobj, struct kobj_uevent_env *env); }; struct kobj_attribute { -- cgit v1.2.3 From d6b9c679bbac1d1d2fcac64391b4cadb91763a6f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 12 Nov 2021 14:46:32 -0800 Subject: watchdog: bcm7038_wdt: Support platform data configuration The BCM7038 watchdog driver needs to be able to obtain a specific clock name on BCM63xx platforms which is the "periph" clock ticking at 50MHz. make it possible to specify the clock name to obtain via platform data. Signed-off-by: Florian Fainelli Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211112224636.395101-4-f.fainelli@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/linux/platform_data/bcm7038_wdt.h | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 include/linux/platform_data/bcm7038_wdt.h (limited to 'include') diff --git a/include/linux/platform_data/bcm7038_wdt.h b/include/linux/platform_data/bcm7038_wdt.h new file mode 100644 index 000000000000..e18cfd9ec8f9 --- /dev/null +++ b/include/linux/platform_data/bcm7038_wdt.h @@ -0,0 +1,8 @@ +#ifndef __BCM7038_WDT_PDATA_H +#define __BCM7038_WDT_PDATA_H + +struct bcm7038_wdt_platform_data { + const char *clk_name; +}; + +#endif /* __BCM7038_WDT_PDATA_H */ -- cgit v1.2.3 From db3c65bc3a1308db8c914b2bf477b5a36005c3d3 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 27 Dec 2021 19:31:54 -0800 Subject: Drivers: hv: Fix definition of hypercall input & output arg variables The percpu variables hyperv_pcpu_input_arg and hyperv_pcpu_output_arg have been incorrectly defined since their inception. The __percpu qualifier should be associated with the void * (i.e., a pointer), not with the target of the pointer. This distinction makes no difference to gcc and the generated code, but sparse correctly complains. Fix the definitions in the interest of general correctness in addition to making sparse happy. No functional change. Reported-by: kernel test robot Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1640662315-22260-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- include/asm-generic/mshyperv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 94e73ba129c5..c08758b6b364 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -49,8 +49,8 @@ struct ms_hyperv_info { }; extern struct ms_hyperv_info ms_hyperv; -extern void __percpu **hyperv_pcpu_input_arg; -extern void __percpu **hyperv_pcpu_output_arg; +extern void * __percpu *hyperv_pcpu_input_arg; +extern void * __percpu *hyperv_pcpu_output_arg; extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); -- cgit v1.2.3 From b92e301633f0f454aa1cfedac2e096bb9649b367 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sat, 18 Dec 2021 16:25:53 +0100 Subject: mfd: ntxec: Change return type of ntxec_reg8 from __be16 to u16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Register values in NTXEC are big-endian on the I2C bus, but the regmap subsystem handles the conversion between CPU-endian and big-endian data internally. ntxec_reg8 should thus return u16, not __be16. Reported-by: kernel test robot Signed-off-by: Jonathan Neuschäfer Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20211218152553.744615-1-j.neuschaefer@gmx.net --- include/linux/mfd/ntxec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/ntxec.h b/include/linux/mfd/ntxec.h index 26ab3b8eb612..cc6f07bfa2b3 100644 --- a/include/linux/mfd/ntxec.h +++ b/include/linux/mfd/ntxec.h @@ -26,7 +26,7 @@ struct ntxec { * This convenience function converts an 8-bit value to 16-bit for use in the * second kind of register. */ -static inline __be16 ntxec_reg8(u8 value) +static inline u16 ntxec_reg8(u8 value) { return value << 8; } -- cgit v1.2.3 From b6459415b384cb829f0b2a4268f211c789f6cf0b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 28 Dec 2021 16:49:13 -0800 Subject: net: Don't include filter.h from net/sock.h sock.h is pretty heavily used (5k objects rebuilt on x86 after it's touched). We can drop the include of filter.h from it and add a forward declaration of struct sk_filter instead. This decreases the number of rebuilt objects when bpf.h is touched from ~5k to ~1k. There's a lot of missing includes this was masking. Primarily in networking tho, this time. Signed-off-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov Acked-by: Marc Kleine-Budde Acked-by: Florian Fainelli Acked-by: Nikolay Aleksandrov Acked-by: Stefano Garzarella Link: https://lore.kernel.org/bpf/20211229004913.513372-1-kuba@kernel.org --- include/linux/bpf_local_storage.h | 1 + include/linux/dsa/loop.h | 1 + include/net/ipv6.h | 2 ++ include/net/route.h | 1 + include/net/sock.h | 2 +- include/net/xdp_sock.h | 1 + 6 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 24496bc28e7b..a2b625960ffe 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -8,6 +8,7 @@ #define _BPF_LOCAL_STORAGE_H #include +#include #include #include #include diff --git a/include/linux/dsa/loop.h b/include/linux/dsa/loop.h index 5a3470bcc8a7..b8fef35591aa 100644 --- a/include/linux/dsa/loop.h +++ b/include/linux/dsa/loop.h @@ -2,6 +2,7 @@ #ifndef DSA_LOOP_H #define DSA_LOOP_H +#include #include #include #include diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 53ac7707ca70..3afcb128e064 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -21,6 +21,8 @@ #include #include +struct ip_tunnel_info; + #define SIN6_LEN_RFC2133 24 #define IPV6_MAXPLEN 65535 diff --git a/include/net/route.h b/include/net/route.h index 2e6c0e153e3a..4c858dcf1aa8 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -43,6 +43,7 @@ #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) #define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE)) +struct ip_tunnel_info; struct fib_nh; struct fib_info; struct uncached_list; diff --git a/include/net/sock.h b/include/net/sock.h index 37f878564d25..40f6406b9ca5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -56,7 +56,6 @@ #include #include #include -#include #include #include #include @@ -249,6 +248,7 @@ struct sock_common { }; struct bpf_local_storage; +struct sk_filter; /** * struct sock - network layer representation of sockets diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index fff069d2ed1b..3057e1a4a11c 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -6,6 +6,7 @@ #ifndef _LINUX_XDP_SOCK_H #define _LINUX_XDP_SOCK_H +#include #include #include #include -- cgit v1.2.3 From 1bb412d46ca9df90a3fe4b704f5385f03621455d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 29 Dec 2021 16:09:37 +0800 Subject: net_tstamp: define new flag HWTSTAMP_FLAG_BONDED_PHC_INDEX As we defined the new hwtstamp_config flag HWTSTAMP_FLAG_BONDED_PHC_INDEX as enum, it's not easy for userspace program to check if the flag is supported when build. Let's define the new flag so user space could build it on old kernel with ifdef check. Fixes: 9c9211a3fc7a ("net_tstamp: add new flag HWTSTAMP_FLAG_BONDED_PHC_INDEX") Signed-off-by: Hangbin Liu Acked-by: Richard Cochran Signed-off-by: Jakub Kicinski --- include/uapi/linux/net_tstamp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index e258e52cfd1f..55501e5e7ac8 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -87,6 +87,7 @@ enum hwtstamp_flags { * will be the PHC index. */ HWTSTAMP_FLAG_BONDED_PHC_INDEX = (1<<0), +#define HWTSTAMP_FLAG_BONDED_PHC_INDEX HWTSTAMP_FLAG_BONDED_PHC_INDEX HWTSTAMP_FLAG_LAST = HWTSTAMP_FLAG_BONDED_PHC_INDEX, HWTSTAMP_FLAG_MASK = (HWTSTAMP_FLAG_LAST - 1) | HWTSTAMP_FLAG_LAST -- cgit v1.2.3 From 0fe4b381a59ebc53522fce579b281a67a9e1bee6 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 24 Dec 2021 15:29:15 +0000 Subject: bpf: Allow bpf_local_storage to be used by sleepable programs Other maps like hashmaps are already available to sleepable programs. Sleepable BPF programs run under trace RCU. Allow task, sk and inode storage to be used from sleepable programs. This allows sleepable and non-sleepable programs to provide shareable annotations on kernel objects. Sleepable programs run in trace RCU where as non-sleepable programs run in a normal RCU critical section i.e. __bpf_prog_enter{_sleepable} and __bpf_prog_exit{_sleepable}) (rcu_read_lock or rcu_read_lock_trace). In order to make the local storage maps accessible to both sleepable and non-sleepable programs, one needs to call both call_rcu_tasks_trace and call_rcu to wait for both trace and classical RCU grace periods to expire before freeing memory. Paul's work on call_rcu_tasks_trace allows us to have per CPU queueing for call_rcu_tasks_trace. This behaviour can be achieved by setting rcupdate.rcu_task_enqueue_lim= boot parameter. In light of these new performance changes and to keep the local storage code simple, avoid adding a new flag for sleepable maps / local storage to select the RCU synchronization (trace / classical). Also, update the dereferencing of the pointers to use rcu_derference_check (with either the trace or normal RCU locks held) with a common bpf_rcu_lock_held helper method. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20211224152916.1550677-2-kpsingh@kernel.org --- include/linux/bpf_local_storage.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index a2b625960ffe..37b3906af8b1 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -17,6 +17,9 @@ #define BPF_LOCAL_STORAGE_CACHE_SIZE 16 +#define bpf_rcu_lock_held() \ + (rcu_read_lock_held() || rcu_read_lock_trace_held() || \ + rcu_read_lock_bh_held()) struct bpf_local_storage_map_bucket { struct hlist_head list; raw_spinlock_t lock; @@ -162,4 +165,6 @@ struct bpf_local_storage_data * bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, void *value, u64 map_flags); +void bpf_local_storage_free_rcu(struct rcu_head *rcu); + #endif /* _BPF_LOCAL_STORAGE_H */ -- cgit v1.2.3 From 3b80b73a4b3de38f72cd79e1a157449917f2bcb5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Dec 2021 17:27:41 -0800 Subject: net: Add includes masked by netdevice.h including uapi/bpf.h Add missing includes unmasked by the subsequent change. Mostly network drivers missing an include for XDP_PACKET_HEADROOM. Signed-off-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211230012742.770642-2-kuba@kernel.org --- include/net/ip6_fib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 83b8070d1cc9..a9a4ccc0cdb5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_HASHSZ 256 -- cgit v1.2.3 From aebb51ec3db2a871d74b4afad3f9914812acf120 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Dec 2021 17:27:42 -0800 Subject: bpf: Invert the dependency between bpf-netns.h and netns/bpf.h netns/bpf.h gets included by netdevice.h (thru net_namespace.h) which in turn gets included in a lot of places. We should keep netns/bpf.h as light-weight as possible. bpf-netns.h seems to contain more implementation details than deserves to be included in a netns header. It needs to pull in uapi/bpf.h to get various enum types. Move enum netns_bpf_attach_type to netns/bpf.h and invert the dependency. This makes netns/bpf.h fit the mold of a struct definition header more clearly, and drops the number of objects rebuilt when uapi/bpf.h is touched from 7.7k to 1.1k. Signed-off-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211230012742.770642-3-kuba@kernel.org --- include/linux/bpf-netns.h | 8 +------- include/net/netns/bpf.h | 9 ++++++++- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-netns.h b/include/linux/bpf-netns.h index 722f799c1a2e..413cfa5e4b07 100644 --- a/include/linux/bpf-netns.h +++ b/include/linux/bpf-netns.h @@ -3,15 +3,9 @@ #define _BPF_NETNS_H #include +#include #include -enum netns_bpf_attach_type { - NETNS_BPF_INVALID = -1, - NETNS_BPF_FLOW_DISSECTOR = 0, - NETNS_BPF_SK_LOOKUP, - MAX_NETNS_BPF_ATTACH_TYPE -}; - static inline enum netns_bpf_attach_type to_netns_bpf_attach_type(enum bpf_attach_type attach_type) { diff --git a/include/net/netns/bpf.h b/include/net/netns/bpf.h index 0ca6a1b87185..2c01a278d1eb 100644 --- a/include/net/netns/bpf.h +++ b/include/net/netns/bpf.h @@ -6,11 +6,18 @@ #ifndef __NETNS_BPF_H__ #define __NETNS_BPF_H__ -#include +#include struct bpf_prog; struct bpf_prog_array; +enum netns_bpf_attach_type { + NETNS_BPF_INVALID = -1, + NETNS_BPF_FLOW_DISSECTOR = 0, + NETNS_BPF_SK_LOOKUP, + MAX_NETNS_BPF_ATTACH_TYPE +}; + struct netns_bpf { /* Array of programs to run compiled from progs or links */ struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE]; -- cgit v1.2.3 From 882c982dada4d53079c56de94ccbce1e21cc675f Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 23 Dec 2021 11:16:17 +0300 Subject: acpi: Store CRC-32 hash of the _PLD in struct acpi_device Storing CRC-32 hash of the Physical Location of Device object (_PLD) with devices that have it. The hash is stored to a new struct acpi_device member "pld_crc". The hash makes it easier to find devices that share a location, as there is no need to evaluate the entire object every time. Knowledge about devices that share a location can be used in device drivers that need to know the connections to other components inside a system. USB3 ports will for example always share their location with a USB2 port. Acked-by: Rafael J. Wysocki Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20211223081620.45479-3-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/acpi/acpi_bus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 480f9207a4c6..3a1280649ddf 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -358,6 +358,7 @@ struct acpi_gpio_mapping; /* Device */ struct acpi_device { + u32 pld_crc; int device_type; acpi_handle handle; /* no handle for fixed hardware */ struct fwnode_handle fwnode; -- cgit v1.2.3 From 730b49aac426e1e8016d3c2dd6b407e500423821 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 23 Dec 2021 11:24:22 +0300 Subject: usb: typec: port-mapper: Convert to the component framework Instead of trying to keep track of the connections to the USB Type-C connectors separately, letting the component framework take care of that. From now on every USB Type-C connector will register itself as "aggregate" - component master - and anything that can be connected to it inside the system can then simply register itself as a generic component. The matching of the components and the connector shall rely on ACPI _PLD initially. Before registering itself as the aggregate, the connector will find all other ACPI devices that have matching _PLD crc hash with it (matching value in the pld_crc member of struct acpi_device), and add a component match entry for each one of them. Because only ACPI is supported for now, the driver shall only be build when ACPI is supported. This removes the need for the custom API that the driver exposed. The components and the connector can therefore exist completely independently of each other. The order in which they are registered, as well as are they modules or not, is now irrelevant. Acked-by: Rafael J. Wysocki Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20211223082422.45637-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index e2e44bb1dad8..7ba45a97eeae 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -305,16 +305,4 @@ void typec_partner_set_svdm_version(struct typec_partner *partner, enum usb_pd_svdm_ver svdm_version); int typec_get_negotiated_svdm_version(struct typec_port *port); -#if IS_REACHABLE(CONFIG_TYPEC) -int typec_link_port(struct device *port); -void typec_unlink_port(struct device *port); -#else -static inline int typec_link_port(struct device *port) -{ - return 0; -} - -static inline void typec_unlink_port(struct device *port) { } -#endif - #endif /* __LINUX_USB_TYPEC_H */ -- cgit v1.2.3 From 510a0bdb2bfcff8d7be822c72adc3add7a97d559 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 23 Dec 2021 11:24:32 +0300 Subject: usb: Remove usb_for_each_port() There are no more users for the function. Reviewed-by: Andy Shevchenko Acked-by: Rafael J. Wysocki Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20211223082432.45653-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index 7ccaa76a9a96..200b7b79acb5 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -875,15 +875,6 @@ extern struct usb_host_interface *usb_find_alt_setting( unsigned int iface_num, unsigned int alt_num); -#if IS_REACHABLE(CONFIG_USB) -int usb_for_each_port(void *data, int (*fn)(struct device *, void *)); -#else -static inline int usb_for_each_port(void *data, int (*fn)(struct device *, void *)) -{ - return 0; -} -#endif - /* port claiming functions */ int usb_hub_claim_port(struct usb_device *hdev, unsigned port1, struct usb_dev_state *owner); -- cgit v1.2.3 From b4a29b94804c4774f22555651296b838df6ec0e4 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 28 Dec 2021 18:22:00 +0100 Subject: serial: 8250: Move Alpha-specific quirk out of the core struct uart_8250_port contains mcr_mask and mcr_force members whose sole purpose is to work around an Alpha-specific quirk. This code doesn't belong in the core where it is executed by everyone else, so move it to a proper ->set_mctrl callback which is used on the affected Alpha machine only. The quirk was introduced in January 1995: https://git.kernel.org/pub/scm/linux/kernel/git/history/history.git/diff/drivers/char/serial.c?h=1.1.83 The members in struct uart_8250_port were added in 2002: https://git.kernel.org/history/history/c/4524aad27854 The quirk applies to non-PCI Alphas and arch/alpha/Kconfig specifies "select FORCE_PCI if !ALPHA_JENSEN". So apparently the only affected machine is the EISA-based Jensen that Linus was working on back then: https://lore.kernel.org/all/CAHk-=wj1JWZ3sCrGz16nxEj7=0O+srMg6Ah3iPTDXSPKEws_SA@mail.gmail.com/ Up until now the quirk is not applied unless CONFIG_PCI is disabled. If users forget to do that or run a generic Alpha kernel, the serial ports aren't usable on Jensen. Avoid by confining the quirk to CONFIG_ALPHA_JENSEN instead of !CONFIG_PCI. On generic Alpha kernels, auto-detect at runtime whether the quirk needs to be applied. Cc: Russell King Cc: Ulrich Teichert Cc: Linus Torvalds Signed-off-by: Lukas Wunner Link: https://lore.kernel.org/r/b83d069cb516549b8a5420e097bb6bdd806f36fc.1640695609.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 5db211f43b29..ff84a3ed10ea 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -104,8 +104,6 @@ struct uart_8250_port { unsigned char ier; unsigned char lcr; unsigned char mcr; - unsigned char mcr_mask; /* mask of user bits */ - unsigned char mcr_force; /* mask of forced bits */ unsigned char cur_iotype; /* Running I/O type */ unsigned int rpm_tx_active; unsigned char canary; /* non-zero during system sleep -- cgit v1.2.3 From d8e9a406a931f687945703a4bac45042eb81ce92 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Sun, 12 Dec 2021 22:21:28 +0900 Subject: serdev: BREAK/FRAME/PARITY/OVERRUN notification prototype V2 Allow serdev device drivers get notified by hardware errors such as BREAK, FRAME, PARITY and OVERRUN. With this patch, in the event of an error detected in the UART device driver the serdev_device_driver will get the newly introduced ->error() callback invoked if serdev_device_set_error_mask() has previously been used to enable the type of error. The errors are taken straight from the TTY layer and fed into the serdev_device_driver after filtering out only enabled errors. Without this patch the hardware errors never reach the serdev_device_driver. Signed-off-by: Magnus Damm Link: https://lore.kernel.org/r/163931528842.27756.3665040315954968747.sendpatchset@octo Signed-off-by: Greg Kroah-Hartman --- include/linux/serdev.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/serdev.h b/include/linux/serdev.h index 3368c261ab62..0d0b22fc7e37 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -19,12 +19,15 @@ struct serdev_device; /** * struct serdev_device_ops - Callback operations for a serdev device + * @error: Function called with errors received from device; + * may sleep. * @receive_buf: Function called with data received from device; * returns number of bytes accepted; may sleep. * @write_wakeup: Function called when ready to transmit more data; must * not sleep. */ struct serdev_device_ops { + void (*error)(struct serdev_device *, unsigned long); int (*receive_buf)(struct serdev_device *, const unsigned char *, size_t); void (*write_wakeup)(struct serdev_device *); }; @@ -76,6 +79,11 @@ enum serdev_parity { SERDEV_PARITY_ODD, }; +#define SERDEV_ERROR_BREAK 0 +#define SERDEV_ERROR_FRAME 1 +#define SERDEV_ERROR_PARITY 2 +#define SERDEV_ERROR_OVERRUN 3 + /* * serdev controller structures */ @@ -85,6 +93,7 @@ struct serdev_controller_ops { int (*write_room)(struct serdev_controller *); int (*open)(struct serdev_controller *); void (*close)(struct serdev_controller *); + void (*set_error_mask)(struct serdev_controller *, unsigned long); void (*set_flow_control)(struct serdev_controller *, bool); int (*set_parity)(struct serdev_controller *, enum serdev_parity); unsigned int (*set_baudrate)(struct serdev_controller *, unsigned int); @@ -190,12 +199,24 @@ static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl, return serdev->ops->receive_buf(serdev, data, count); } +static inline void serdev_controller_error(struct serdev_controller *ctrl, + unsigned long errors) +{ + struct serdev_device *serdev = ctrl->serdev; + + if (!serdev || !serdev->ops->error) + return; + + serdev->ops->error(serdev, errors); +} + #if IS_ENABLED(CONFIG_SERIAL_DEV_BUS) int serdev_device_open(struct serdev_device *); void serdev_device_close(struct serdev_device *); int devm_serdev_device_open(struct device *, struct serdev_device *); unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int); +void serdev_device_set_error_mask(struct serdev_device *, unsigned long); void serdev_device_set_flow_control(struct serdev_device *, bool); int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t); void serdev_device_wait_until_sent(struct serdev_device *, long); @@ -238,6 +259,7 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev { return 0; } +static inline void serdev_device_set_error_mask(struct serdev_device *sdev, unsigned long mask) {} static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {} static inline int serdev_device_write_buf(struct serdev_device *serdev, const unsigned char *buf, -- cgit v1.2.3 From 5207fb2f311b0c45a9abfa1c84b7a7b657ffa550 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 30 Dec 2021 16:02:41 +0100 Subject: counter: Provide a wrapper to access device private data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For now this just wraps accessing struct counter_device::priv. However this is about to change and converting drivers to this helper individually makes fixing device lifetime issues result in easier to review patches. Reviewed-by: Jonathan Cameron Acked-by: William Breathitt Gray Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20211230150300.72196-5-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/counter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/counter.h b/include/linux/counter.h index dfbde2808998..627f1757f6bb 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -329,6 +329,8 @@ struct counter_device { struct mutex ops_exist_lock; }; +void *counter_priv(const struct counter_device *const counter); + int counter_register(struct counter_device *const counter); void counter_unregister(struct counter_device *const counter); int devm_counter_register(struct device *dev, -- cgit v1.2.3 From c18e2760308e30f007fa24b558b87c39d7e86ff1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 30 Dec 2021 16:02:50 +0100 Subject: counter: Provide alternative counter registration functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation gets device lifetime tracking wrong. The problem is that allocation of struct counter_device is controlled by the individual drivers but this structure contains a struct device that might have to live longer than a driver is bound. As a result a command sequence like: { sleep 5; echo bang; } > /dev/counter0 & sleep 1; echo 40000000.timer:counter > /sys/bus/platform/drivers/stm32-timer-counter/unbind can keep a reference to the struct device and unbinding results in freeing the memory occupied by this device resulting in an oops. This commit provides two new functions (plus some helpers): - counter_alloc() to allocate a struct counter_device that is automatically freed once the embedded struct device is released - counter_add() to register such a device. Note that this commit doesn't fix any issues, all drivers have to be converted to these new functions to correct the lifetime problems. Reviewed-by: Jonathan Cameron Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20211230150300.72196-14-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/counter.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/counter.h b/include/linux/counter.h index 627f1757f6bb..ed8d5820f0d1 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -327,14 +327,29 @@ struct counter_device { spinlock_t events_in_lock; struct mutex events_out_lock; struct mutex ops_exist_lock; + + /* + * This can go away once all drivers are converted to + * counter_alloc()/counter_add(). + */ + bool legacy_device; }; void *counter_priv(const struct counter_device *const counter); int counter_register(struct counter_device *const counter); + +struct counter_device *counter_alloc(size_t sizeof_priv); +void counter_put(struct counter_device *const counter); +int counter_add(struct counter_device *const counter); + void counter_unregister(struct counter_device *const counter); int devm_counter_register(struct device *dev, struct counter_device *const counter); +struct counter_device *devm_counter_alloc(struct device *dev, + size_t sizeof_priv); +int devm_counter_add(struct device *dev, + struct counter_device *const counter); void counter_push_event(struct counter_device *const counter, const u8 event, const u8 channel); -- cgit v1.2.3 From f2ee4759fb700b32a1bd830960fe86bf6bdfd0ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 30 Dec 2021 16:03:00 +0100 Subject: counter: remove old and now unused registration API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Usage of counter_register() yields issues in device lifetime tracking. All drivers were converted to the new API, so the old one can go away. Reviewed-by: Jonathan Cameron Acked-by: William Breathitt Gray Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20211230150300.72196-24-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/counter.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/counter.h b/include/linux/counter.h index ed8d5820f0d1..1fe17f5adb09 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -314,8 +314,6 @@ struct counter_device { struct counter_comp *ext; size_t num_ext; - void *priv; - struct device dev; struct cdev chrdev; struct list_head events_list; @@ -327,25 +325,15 @@ struct counter_device { spinlock_t events_in_lock; struct mutex events_out_lock; struct mutex ops_exist_lock; - - /* - * This can go away once all drivers are converted to - * counter_alloc()/counter_add(). - */ - bool legacy_device; }; void *counter_priv(const struct counter_device *const counter); -int counter_register(struct counter_device *const counter); - struct counter_device *counter_alloc(size_t sizeof_priv); void counter_put(struct counter_device *const counter); int counter_add(struct counter_device *const counter); void counter_unregister(struct counter_device *const counter); -int devm_counter_register(struct device *dev, - struct counter_device *const counter); struct counter_device *devm_counter_alloc(struct device *dev, size_t sizeof_priv); int devm_counter_add(struct device *dev, -- cgit v1.2.3 From fb0b00af04d083770d6e2762b2838357519f7d2d Mon Sep 17 00:00:00 2001 From: Jinzhou Su Date: Fri, 24 Dec 2021 09:04:59 +0800 Subject: ACPI: CPPC: Add CPPC enable register function Add a new function to enable CPPC feature. This function will write Continuous Performance Control package EnableRegister field on the processor. CPPC EnableRegister register described in section 8.4.7.1 of ACPI 6.4: This element is optional. If supported, contains a resource descriptor with a single Register() descriptor that describes a register to which OSPM writes a One to enable CPPC on this processor. Before this register is set, the processor will be controlled by legacy mechanisms (ACPI Pstates, firmware, etc.). This register will be used for AMD processors to enable AMD P-State function instead of legacy ACPI P-States. Signed-off-by: Jinzhou Su Signed-off-by: Huang Rui Signed-off-by: Rafael J. Wysocki --- include/acpi/cppc_acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index bc159a9b4a73..92b7ea8d8f5e 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -138,6 +138,7 @@ extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); +extern int cppc_set_enable(int cpu, bool enable); extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); extern bool acpi_cpc_valid(void); extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data); @@ -162,6 +163,10 @@ static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) { return -ENOTSUPP; } +static inline int cppc_set_enable(int cpu, bool enable) +{ + return -ENOTSUPP; +} static inline int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps) { return -ENOTSUPP; -- cgit v1.2.3 From 35f9e773bb883a1be87410570f92c8c438e0478b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 30 Dec 2021 15:17:20 +0100 Subject: ACPI / x86: Add acpi_quirk_skip_[i2c_client|serdev]_enumeration() helpers x86 ACPI boards which ship with only Android as their factory image usually declare a whole bunch of bogus I2C devs in their ACPI tables and sometimes there are issues with serdev devices on these boards too, e.g. the resource points to the wrong serdev_controller. Instantiating I2C / serdev devs for these bogus devs causes various issues, e.g. GPIO/IRQ resource conflicts because sometimes drivers do bind to them. The Android x86 kernel fork shipped on these devices has some special code to remove the bogus I2C clients (and serdevs are ignored completely). Introduce acpi_quirk_skip_i2c_client_enumeration() and acpi_quirk_skip_serdev_enumeration() helpers. Which can be used by the I2C/ serdev code to skip instantiating any I2C or serdev devs on broken boards. These 2 helpers are added to drivers/acpi/x86/utils.c so that the DMI table can be shared between the I2C and serdev code. Note these boards typically do actually have I2C and serdev devices, just different ones then the ones described in their DSDT. The devices which are actually present are manually instantiated by the drivers/platform/x86/x86-android-tablets.c kernel module. The new helpers are only build if CONFIG_X86_ANDROID_TABLETS is enabled, otherwise they are empty stubs to not unnecessarily grow the kernel size. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 17f700c9a4f9..88f21780447b 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -622,6 +622,22 @@ static inline bool acpi_device_always_present(struct acpi_device *adev) } #endif +#if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS) +bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev); +int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip); +#else +static inline bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev) +{ + return false; +} +static inline int +acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip) +{ + *skip = false; + return 0; +} +#endif + #ifdef CONFIG_PM void acpi_pm_wakeup_event(struct device *dev); acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev, -- cgit v1.2.3 From d6c6d0bb2cb3af91c9c1546af7cdf4770d0df443 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 29 Dec 2021 12:36:20 +0100 Subject: net: remove references to CONFIG_IRDA in network header files Commit d64c2a76123f ("staging: irda: remove the irda network stack and drivers") removes the config IRDA. Remove the remaining references to this non-existing config in the network header files. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20211229113620.19368-1-lukas.bulwahn@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/atalk.h | 2 +- include/linux/netdevice.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/atalk.h b/include/linux/atalk.h index f6034ba774be..a55bfc6567d0 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -113,7 +113,7 @@ extern int aarp_proto_init(void); /* Inter module exports */ /* Give a device find its atif control structure */ -#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK) +#if IS_ENABLED(CONFIG_ATALK) static inline struct atalk_iface *atalk_find_dev(struct net_device *dev) { return dev->atalk_ptr; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2684bdb6defa..6f99c8f51b60 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2098,7 +2098,7 @@ struct net_device { #if IS_ENABLED(CONFIG_TIPC) struct tipc_bearer __rcu *tipc_ptr; #endif -#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK) +#if IS_ENABLED(CONFIG_ATALK) void *atalk_ptr; #endif struct in_device __rcu *ip_ptr; -- cgit v1.2.3 From c3fb0e280b4cdbf382f59eb6b276e4c6047bc219 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 18 Nov 2021 02:32:37 +0200 Subject: net/mlx5: DR, Fix lower case macro prefix "mlx5_" to "MLX5_" Macros prefix should be capital letters - fix the prefix in mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED. Signed-off-by: Yevgeny Kliteynik --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index e9db12aae8f9..18b816b41545 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1291,7 +1291,7 @@ enum { enum { MLX5_FLEX_PARSER_GENEVE_ENABLED = 1 << 3, MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED = 1 << 4, - mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED = 1 << 5, + MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED = 1 << 5, MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED = 1 << 7, MLX5_FLEX_PARSER_ICMP_V4_ENABLED = 1 << 8, MLX5_FLEX_PARSER_ICMP_V6_ENABLED = 1 << 9, -- cgit v1.2.3 From 0f2a6c3b9219bdf497750258cd2ad513f0056b42 Mon Sep 17 00:00:00 2001 From: Muhammad Sammar Date: Sun, 5 Sep 2021 15:16:21 +0300 Subject: net/mlx5: Add misc5 flow table match parameters Add support for misc5 match parameter as per HW spec, this will allow matching on tunnel_header fields. Signed-off-by: Muhammad Sammar Signed-off-by: Yevgeny Kliteynik --- include/linux/mlx5/device.h | 1 + include/linux/mlx5/mlx5_ifc.h | 25 ++++++++++++++++++++++++- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 2 +- 3 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 9c25edfd59a6..604b85dd770a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1117,6 +1117,7 @@ enum { MLX5_MATCH_MISC_PARAMETERS_2 = 1 << 3, MLX5_MATCH_MISC_PARAMETERS_3 = 1 << 4, MLX5_MATCH_MISC_PARAMETERS_4 = 1 << 5, + MLX5_MATCH_MISC_PARAMETERS_5 = 1 << 6, }; enum { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 18b816b41545..c74c5e147cb9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -670,6 +670,26 @@ struct mlx5_ifc_fte_match_set_misc4_bits { u8 reserved_at_100[0x100]; }; +struct mlx5_ifc_fte_match_set_misc5_bits { + u8 macsec_tag_0[0x20]; + + u8 macsec_tag_1[0x20]; + + u8 macsec_tag_2[0x20]; + + u8 macsec_tag_3[0x20]; + + u8 tunnel_header_0[0x20]; + + u8 tunnel_header_1[0x20]; + + u8 tunnel_header_2[0x20]; + + u8 tunnel_header_3[0x20]; + + u8 reserved_at_100[0x100]; +}; + struct mlx5_ifc_cmd_pas_bits { u8 pa_h[0x20]; @@ -1839,7 +1859,9 @@ struct mlx5_ifc_fte_match_param_bits { struct mlx5_ifc_fte_match_set_misc4_bits misc_parameters_4; - u8 reserved_at_c00[0x400]; + struct mlx5_ifc_fte_match_set_misc5_bits misc_parameters_5; + + u8 reserved_at_e00[0x200]; }; enum { @@ -5977,6 +5999,7 @@ enum { MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2 = 0x3, MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_3 = 0x4, MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_4 = 0x5, + MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_5 = 0x6, }; struct mlx5_ifc_query_flow_group_out_bits { diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index ca2372864b70..e539c84d63f1 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -252,7 +252,7 @@ enum mlx5_ib_device_query_context_attrs { MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX = (1U << UVERBS_ID_NS_SHIFT), }; -#define MLX5_IB_DW_MATCH_PARAM 0x90 +#define MLX5_IB_DW_MATCH_PARAM 0xA0 struct mlx5_ib_match_params { __u32 match_params[MLX5_IB_DW_MATCH_PARAM]; -- cgit v1.2.3 From f59464e257bdbd4df6df9a4505d7858a0baf6cf7 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Mon, 8 Nov 2021 02:42:50 +0200 Subject: net/mlx5: DR, Add support for matching on geneve_tlv_option_0_exist field Match on geneve_tlv_option_0_exist field on devices that support STEv1. Signed-off-by: Muhammad Sammar Signed-off-by: Yevgeny Kliteynik --- include/linux/mlx5/mlx5_ifc.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c74c5e147cb9..deaa0f71213f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -372,7 +372,8 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 reserved_at_37[0x9]; u8 geneve_tlv_option_0_data[0x1]; - u8 reserved_at_41[0x4]; + u8 geneve_tlv_option_0_exist[0x1]; + u8 reserved_at_42[0x3]; u8 outer_first_mpls_over_udp[0x4]; u8 outer_first_mpls_over_gre[0x4]; u8 inner_first_mpls[0x4]; @@ -551,7 +552,8 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 bth_opcode[0x8]; u8 geneve_vni[0x18]; - u8 reserved_at_d8[0x7]; + u8 reserved_at_d8[0x6]; + u8 geneve_tlv_option_0_exist[0x1]; u8 geneve_oam[0x1]; u8 reserved_at_e0[0xc]; -- cgit v1.2.3 From 4ff725e1d4adfd313bc9767523fc8d6e90d50f9c Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 23 Nov 2021 02:11:12 +0200 Subject: net/mlx5: DR, Ignore modify TTL if device doesn't support it When modifying TTL, packet's csum has to be recalculated. Due to HW issue in ConnectX-5, csum recalculation for modify TTL is supported through a work-around that is specifically enabled by configuration. If the work-around isn't enabled, ignore the modify TTL action rather than adding an unsupported action. Signed-off-by: Yevgeny Kliteynik --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index deaa0f71213f..598ac3bcc901 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -833,7 +833,7 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 fdb_to_vport_reg_c_id[0x8]; u8 reserved_at_8[0xd]; u8 fdb_modify_header_fwd_to_table[0x1]; - u8 reserved_at_16[0x1]; + u8 fdb_ipv4_ttl_modify[0x1]; u8 flow_source[0x1]; u8 reserved_at_18[0x2]; u8 multi_fdb_encap[0x1]; -- cgit v1.2.3 From 99a507a8ea28542ec196e2dd80096708e2482735 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 31 Dec 2021 13:42:30 +0100 Subject: Revert "serdev: BREAK/FRAME/PARITY/OVERRUN notification prototype V2" This reverts commit d8e9a406a931f687945703a4bac45042eb81ce92. It needs some future changes as pointed out by Johan and is not ready to be merged just yet. Reported-by: Johan Hovold Cc: Magnus Damm Link: https://lore.kernel.org/r/Yc7oZ/1tu95Z4wPS@hovoldconsulting.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serdev.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include') diff --git a/include/linux/serdev.h b/include/linux/serdev.h index 0d0b22fc7e37..3368c261ab62 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -19,15 +19,12 @@ struct serdev_device; /** * struct serdev_device_ops - Callback operations for a serdev device - * @error: Function called with errors received from device; - * may sleep. * @receive_buf: Function called with data received from device; * returns number of bytes accepted; may sleep. * @write_wakeup: Function called when ready to transmit more data; must * not sleep. */ struct serdev_device_ops { - void (*error)(struct serdev_device *, unsigned long); int (*receive_buf)(struct serdev_device *, const unsigned char *, size_t); void (*write_wakeup)(struct serdev_device *); }; @@ -79,11 +76,6 @@ enum serdev_parity { SERDEV_PARITY_ODD, }; -#define SERDEV_ERROR_BREAK 0 -#define SERDEV_ERROR_FRAME 1 -#define SERDEV_ERROR_PARITY 2 -#define SERDEV_ERROR_OVERRUN 3 - /* * serdev controller structures */ @@ -93,7 +85,6 @@ struct serdev_controller_ops { int (*write_room)(struct serdev_controller *); int (*open)(struct serdev_controller *); void (*close)(struct serdev_controller *); - void (*set_error_mask)(struct serdev_controller *, unsigned long); void (*set_flow_control)(struct serdev_controller *, bool); int (*set_parity)(struct serdev_controller *, enum serdev_parity); unsigned int (*set_baudrate)(struct serdev_controller *, unsigned int); @@ -199,24 +190,12 @@ static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl, return serdev->ops->receive_buf(serdev, data, count); } -static inline void serdev_controller_error(struct serdev_controller *ctrl, - unsigned long errors) -{ - struct serdev_device *serdev = ctrl->serdev; - - if (!serdev || !serdev->ops->error) - return; - - serdev->ops->error(serdev, errors); -} - #if IS_ENABLED(CONFIG_SERIAL_DEV_BUS) int serdev_device_open(struct serdev_device *); void serdev_device_close(struct serdev_device *); int devm_serdev_device_open(struct device *, struct serdev_device *); unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int); -void serdev_device_set_error_mask(struct serdev_device *, unsigned long); void serdev_device_set_flow_control(struct serdev_device *, bool); int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t); void serdev_device_wait_until_sent(struct serdev_device *, long); @@ -259,7 +238,6 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev { return 0; } -static inline void serdev_device_set_error_mask(struct serdev_device *sdev, unsigned long mask) {} static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {} static inline int serdev_device_write_buf(struct serdev_device *serdev, const unsigned char *buf, -- cgit v1.2.3 From a87d42227cf5614fe0040ddd1fe642c54298b42c Mon Sep 17 00:00:00 2001 From: Lucas Tanure Date: Fri, 17 Dec 2021 11:56:59 +0000 Subject: ASoC: cs35l41: Convert tables to shared source code To support CS35L41 in HDA systems the HDA driver for CS35L41 would have to duplicate some functions that already exist on ASoC driver So instead of duplicate the code, use the new lib source as a shared resource for both ASoC and HDA Also, change the way CONFIG_SND_SOC_CS35L41 is selected, as reported by Intel Kernel test robot, it is possible to build SND_SOC_CS35L41_SPI/I2C without the main driver, which would lead to build failures. Signed-off-by: Lucas Tanure Reported-by: kernel test robot Link: https://lore.kernel.org/r/20211217115708.882525-2-tanureal@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l41.h | 733 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 733 insertions(+) (limited to 'include') diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h index 1f1e3c6c9be1..aac3ffb9bc89 100644 --- a/include/sound/cs35l41.h +++ b/include/sound/cs35l41.h @@ -10,6 +10,721 @@ #ifndef __CS35L41_H #define __CS35L41_H +#include + +#define CS35L41_FIRSTREG 0x00000000 +#define CS35L41_LASTREG 0x03804FE8 +#define CS35L41_DEVID 0x00000000 +#define CS35L41_REVID 0x00000004 +#define CS35L41_FABID 0x00000008 +#define CS35L41_RELID 0x0000000C +#define CS35L41_OTPID 0x00000010 +#define CS35L41_SFT_RESET 0x00000020 +#define CS35L41_TEST_KEY_CTL 0x00000040 +#define CS35L41_USER_KEY_CTL 0x00000044 +#define CS35L41_OTP_MEM0 0x00000400 +#define CS35L41_OTP_MEM31 0x0000047C +#define CS35L41_OTP_CTRL0 0x00000500 +#define CS35L41_OTP_CTRL1 0x00000504 +#define CS35L41_OTP_CTRL3 0x00000508 +#define CS35L41_OTP_CTRL4 0x0000050C +#define CS35L41_OTP_CTRL5 0x00000510 +#define CS35L41_OTP_CTRL6 0x00000514 +#define CS35L41_OTP_CTRL7 0x00000518 +#define CS35L41_OTP_CTRL8 0x0000051C +#define CS35L41_PWR_CTRL1 0x00002014 +#define CS35L41_PWR_CTRL2 0x00002018 +#define CS35L41_PWR_CTRL3 0x0000201C +#define CS35L41_CTRL_OVRRIDE 0x00002020 +#define CS35L41_AMP_OUT_MUTE 0x00002024 +#define CS35L41_PROTECT_REL_ERR_IGN 0x00002034 +#define CS35L41_GPIO_PAD_CONTROL 0x0000242C +#define CS35L41_JTAG_CONTROL 0x00002438 +#define CS35L41_PLL_CLK_CTRL 0x00002C04 +#define CS35L41_DSP_CLK_CTRL 0x00002C08 +#define CS35L41_GLOBAL_CLK_CTRL 0x00002C0C +#define CS35L41_DATA_FS_SEL 0x00002C10 +#define CS35L41_TST_FS_MON0 0x00002D10 +#define CS35L41_MDSYNC_EN 0x00003400 +#define CS35L41_MDSYNC_TX_ID 0x00003408 +#define CS35L41_MDSYNC_PWR_CTRL 0x0000340C +#define CS35L41_MDSYNC_DATA_TX 0x00003410 +#define CS35L41_MDSYNC_TX_STATUS 0x00003414 +#define CS35L41_MDSYNC_DATA_RX 0x0000341C +#define CS35L41_MDSYNC_RX_STATUS 0x00003420 +#define CS35L41_MDSYNC_ERR_STATUS 0x00003424 +#define CS35L41_MDSYNC_SYNC_PTE2 0x00003528 +#define CS35L41_MDSYNC_SYNC_PTE3 0x0000352C +#define CS35L41_MDSYNC_SYNC_MSM_STATUS 0x0000353C +#define CS35L41_BSTCVRT_VCTRL1 0x00003800 +#define CS35L41_BSTCVRT_VCTRL2 0x00003804 +#define CS35L41_BSTCVRT_PEAK_CUR 0x00003808 +#define CS35L41_BSTCVRT_SFT_RAMP 0x0000380C +#define CS35L41_BSTCVRT_COEFF 0x00003810 +#define CS35L41_BSTCVRT_SLOPE_LBST 0x00003814 +#define CS35L41_BSTCVRT_SW_FREQ 0x00003818 +#define CS35L41_BSTCVRT_DCM_CTRL 0x0000381C +#define CS35L41_BSTCVRT_DCM_MODE_FORCE 0x00003820 +#define CS35L41_BSTCVRT_OVERVOLT_CTRL 0x00003830 +#define CS35L41_VI_VOL_POL 0x00004000 +#define CS35L41_VIMON_SPKMON_RESYNC 0x00004100 +#define CS35L41_DTEMP_WARN_THLD 0x00004220 +#define CS35L41_DTEMP_CFG 0x00004224 +#define CS35L41_DTEMP_EN 0x00004308 +#define CS35L41_VPVBST_FS_SEL 0x00004400 +#define CS35L41_SP_ENABLES 0x00004800 +#define CS35L41_SP_RATE_CTRL 0x00004804 +#define CS35L41_SP_FORMAT 0x00004808 +#define CS35L41_SP_HIZ_CTRL 0x0000480C +#define CS35L41_SP_FRAME_TX_SLOT 0x00004810 +#define CS35L41_SP_FRAME_RX_SLOT 0x00004820 +#define CS35L41_SP_TX_WL 0x00004830 +#define CS35L41_SP_RX_WL 0x00004840 +#define CS35L41_ASP_CONTROL4 0x00004854 +#define CS35L41_DAC_PCM1_SRC 0x00004C00 +#define CS35L41_ASP_TX1_SRC 0x00004C20 +#define CS35L41_ASP_TX2_SRC 0x00004C24 +#define CS35L41_ASP_TX3_SRC 0x00004C28 +#define CS35L41_ASP_TX4_SRC 0x00004C2C +#define CS35L41_DSP1_RX1_SRC 0x00004C40 +#define CS35L41_DSP1_RX2_SRC 0x00004C44 +#define CS35L41_DSP1_RX3_SRC 0x00004C48 +#define CS35L41_DSP1_RX4_SRC 0x00004C4C +#define CS35L41_DSP1_RX5_SRC 0x00004C50 +#define CS35L41_DSP1_RX6_SRC 0x00004C54 +#define CS35L41_DSP1_RX7_SRC 0x00004C58 +#define CS35L41_DSP1_RX8_SRC 0x00004C5C +#define CS35L41_NGATE1_SRC 0x00004C60 +#define CS35L41_NGATE2_SRC 0x00004C64 +#define CS35L41_AMP_DIG_VOL_CTRL 0x00006000 +#define CS35L41_VPBR_CFG 0x00006404 +#define CS35L41_VBBR_CFG 0x00006408 +#define CS35L41_VPBR_STATUS 0x0000640C +#define CS35L41_VBBR_STATUS 0x00006410 +#define CS35L41_OVERTEMP_CFG 0x00006414 +#define CS35L41_AMP_ERR_VOL 0x00006418 +#define CS35L41_VOL_STATUS_TO_DSP 0x00006450 +#define CS35L41_CLASSH_CFG 0x00006800 +#define CS35L41_WKFET_CFG 0x00006804 +#define CS35L41_NG_CFG 0x00006808 +#define CS35L41_AMP_GAIN_CTRL 0x00006C04 +#define CS35L41_DAC_MSM_CFG 0x00007400 +#define CS35L41_IRQ1_CFG 0x00010000 +#define CS35L41_IRQ1_STATUS 0x00010004 +#define CS35L41_IRQ1_STATUS1 0x00010010 +#define CS35L41_IRQ1_STATUS2 0x00010014 +#define CS35L41_IRQ1_STATUS3 0x00010018 +#define CS35L41_IRQ1_STATUS4 0x0001001C +#define CS35L41_IRQ1_RAW_STATUS1 0x00010090 +#define CS35L41_IRQ1_RAW_STATUS2 0x00010094 +#define CS35L41_IRQ1_RAW_STATUS3 0x00010098 +#define CS35L41_IRQ1_RAW_STATUS4 0x0001009C +#define CS35L41_IRQ1_MASK1 0x00010110 +#define CS35L41_IRQ1_MASK2 0x00010114 +#define CS35L41_IRQ1_MASK3 0x00010118 +#define CS35L41_IRQ1_MASK4 0x0001011C +#define CS35L41_IRQ1_FRC1 0x00010190 +#define CS35L41_IRQ1_FRC2 0x00010194 +#define CS35L41_IRQ1_FRC3 0x00010198 +#define CS35L41_IRQ1_FRC4 0x0001019C +#define CS35L41_IRQ1_EDGE1 0x00010210 +#define CS35L41_IRQ1_EDGE4 0x0001021C +#define CS35L41_IRQ1_POL1 0x00010290 +#define CS35L41_IRQ1_POL2 0x00010294 +#define CS35L41_IRQ1_POL3 0x00010298 +#define CS35L41_IRQ1_POL4 0x0001029C +#define CS35L41_IRQ1_DB3 0x00010318 +#define CS35L41_IRQ2_CFG 0x00010800 +#define CS35L41_IRQ2_STATUS 0x00010804 +#define CS35L41_IRQ2_STATUS1 0x00010810 +#define CS35L41_IRQ2_STATUS2 0x00010814 +#define CS35L41_IRQ2_STATUS3 0x00010818 +#define CS35L41_IRQ2_STATUS4 0x0001081C +#define CS35L41_IRQ2_RAW_STATUS1 0x00010890 +#define CS35L41_IRQ2_RAW_STATUS2 0x00010894 +#define CS35L41_IRQ2_RAW_STATUS3 0x00010898 +#define CS35L41_IRQ2_RAW_STATUS4 0x0001089C +#define CS35L41_IRQ2_MASK1 0x00010910 +#define CS35L41_IRQ2_MASK2 0x00010914 +#define CS35L41_IRQ2_MASK3 0x00010918 +#define CS35L41_IRQ2_MASK4 0x0001091C +#define CS35L41_IRQ2_FRC1 0x00010990 +#define CS35L41_IRQ2_FRC2 0x00010994 +#define CS35L41_IRQ2_FRC3 0x00010998 +#define CS35L41_IRQ2_FRC4 0x0001099C +#define CS35L41_IRQ2_EDGE1 0x00010A10 +#define CS35L41_IRQ2_EDGE4 0x00010A1C +#define CS35L41_IRQ2_POL1 0x00010A90 +#define CS35L41_IRQ2_POL2 0x00010A94 +#define CS35L41_IRQ2_POL3 0x00010A98 +#define CS35L41_IRQ2_POL4 0x00010A9C +#define CS35L41_IRQ2_DB3 0x00010B18 +#define CS35L41_GPIO_STATUS1 0x00011000 +#define CS35L41_GPIO1_CTRL1 0x00011008 +#define CS35L41_GPIO2_CTRL1 0x0001100C +#define CS35L41_MIXER_NGATE_CFG 0x00012000 +#define CS35L41_MIXER_NGATE_CH1_CFG 0x00012004 +#define CS35L41_MIXER_NGATE_CH2_CFG 0x00012008 +#define CS35L41_DSP_MBOX_1 0x00013000 +#define CS35L41_DSP_MBOX_2 0x00013004 +#define CS35L41_DSP_MBOX_3 0x00013008 +#define CS35L41_DSP_MBOX_4 0x0001300C +#define CS35L41_DSP_MBOX_5 0x00013010 +#define CS35L41_DSP_MBOX_6 0x00013014 +#define CS35L41_DSP_MBOX_7 0x00013018 +#define CS35L41_DSP_MBOX_8 0x0001301C +#define CS35L41_DSP_VIRT1_MBOX_1 0x00013020 +#define CS35L41_DSP_VIRT1_MBOX_2 0x00013024 +#define CS35L41_DSP_VIRT1_MBOX_3 0x00013028 +#define CS35L41_DSP_VIRT1_MBOX_4 0x0001302C +#define CS35L41_DSP_VIRT1_MBOX_5 0x00013030 +#define CS35L41_DSP_VIRT1_MBOX_6 0x00013034 +#define CS35L41_DSP_VIRT1_MBOX_7 0x00013038 +#define CS35L41_DSP_VIRT1_MBOX_8 0x0001303C +#define CS35L41_DSP_VIRT2_MBOX_1 0x00013040 +#define CS35L41_DSP_VIRT2_MBOX_2 0x00013044 +#define CS35L41_DSP_VIRT2_MBOX_3 0x00013048 +#define CS35L41_DSP_VIRT2_MBOX_4 0x0001304C +#define CS35L41_DSP_VIRT2_MBOX_5 0x00013050 +#define CS35L41_DSP_VIRT2_MBOX_6 0x00013054 +#define CS35L41_DSP_VIRT2_MBOX_7 0x00013058 +#define CS35L41_DSP_VIRT2_MBOX_8 0x0001305C +#define CS35L41_CLOCK_DETECT_1 0x00014000 +#define CS35L41_TIMER1_CONTROL 0x00015000 +#define CS35L41_TIMER1_COUNT_PRESET 0x00015004 +#define CS35L41_TIMER1_START_STOP 0x0001500C +#define CS35L41_TIMER1_STATUS 0x00015010 +#define CS35L41_TIMER1_COUNT_READBACK 0x00015014 +#define CS35L41_TIMER1_DSP_CLK_CFG 0x00015018 +#define CS35L41_TIMER1_DSP_CLK_STATUS 0x0001501C +#define CS35L41_TIMER2_CONTROL 0x00015100 +#define CS35L41_TIMER2_COUNT_PRESET 0x00015104 +#define CS35L41_TIMER2_START_STOP 0x0001510C +#define CS35L41_TIMER2_STATUS 0x00015110 +#define CS35L41_TIMER2_COUNT_READBACK 0x00015114 +#define CS35L41_TIMER2_DSP_CLK_CFG 0x00015118 +#define CS35L41_TIMER2_DSP_CLK_STATUS 0x0001511C +#define CS35L41_DFT_JTAG_CONTROL 0x00016000 +#define CS35L41_DIE_STS1 0x00017040 +#define CS35L41_DIE_STS2 0x00017044 +#define CS35L41_TEMP_CAL1 0x00017048 +#define CS35L41_TEMP_CAL2 0x0001704C +#define CS35L41_DSP1_XMEM_PACK_0 0x02000000 +#define CS35L41_DSP1_XMEM_PACK_3068 0x02002FF0 +#define CS35L41_DSP1_XMEM_UNPACK32_0 0x02400000 +#define CS35L41_DSP1_XMEM_UNPACK32_2046 0x02401FF8 +#define CS35L41_DSP1_TIMESTAMP_COUNT 0x025C0800 +#define CS35L41_DSP1_SYS_ID 0x025E0000 +#define CS35L41_DSP1_SYS_VERSION 0x025E0004 +#define CS35L41_DSP1_SYS_CORE_ID 0x025E0008 +#define CS35L41_DSP1_SYS_AHB_ADDR 0x025E000C +#define CS35L41_DSP1_SYS_XSRAM_SIZE 0x025E0010 +#define CS35L41_DSP1_SYS_YSRAM_SIZE 0x025E0018 +#define CS35L41_DSP1_SYS_PSRAM_SIZE 0x025E0020 +#define CS35L41_DSP1_SYS_PM_BOOT_SIZE 0x025E0028 +#define CS35L41_DSP1_SYS_FEATURES 0x025E002C +#define CS35L41_DSP1_SYS_FIR_FILTERS 0x025E0030 +#define CS35L41_DSP1_SYS_LMS_FILTERS 0x025E0034 +#define CS35L41_DSP1_SYS_XM_BANK_SIZE 0x025E0038 +#define CS35L41_DSP1_SYS_YM_BANK_SIZE 0x025E003C +#define CS35L41_DSP1_SYS_PM_BANK_SIZE 0x025E0040 +#define CS35L41_DSP1_AHBM_WIN0_CTRL0 0x025E2000 +#define CS35L41_DSP1_AHBM_WIN0_CTRL1 0x025E2004 +#define CS35L41_DSP1_AHBM_WIN1_CTRL0 0x025E2008 +#define CS35L41_DSP1_AHBM_WIN1_CTRL1 0x025E200C +#define CS35L41_DSP1_AHBM_WIN2_CTRL0 0x025E2010 +#define CS35L41_DSP1_AHBM_WIN2_CTRL1 0x025E2014 +#define CS35L41_DSP1_AHBM_WIN3_CTRL0 0x025E2018 +#define CS35L41_DSP1_AHBM_WIN3_CTRL1 0x025E201C +#define CS35L41_DSP1_AHBM_WIN4_CTRL0 0x025E2020 +#define CS35L41_DSP1_AHBM_WIN4_CTRL1 0x025E2024 +#define CS35L41_DSP1_AHBM_WIN5_CTRL0 0x025E2028 +#define CS35L41_DSP1_AHBM_WIN5_CTRL1 0x025E202C +#define CS35L41_DSP1_AHBM_WIN6_CTRL0 0x025E2030 +#define CS35L41_DSP1_AHBM_WIN6_CTRL1 0x025E2034 +#define CS35L41_DSP1_AHBM_WIN7_CTRL0 0x025E2038 +#define CS35L41_DSP1_AHBM_WIN7_CTRL1 0x025E203C +#define CS35L41_DSP1_AHBM_WIN_DBG_CTRL0 0x025E2040 +#define CS35L41_DSP1_AHBM_WIN_DBG_CTRL1 0x025E2044 +#define CS35L41_DSP1_XMEM_UNPACK24_0 0x02800000 +#define CS35L41_DSP1_XMEM_UNPACK24_4093 0x02803FF4 +#define CS35L41_DSP1_CTRL_BASE 0x02B80000 +#define CS35L41_DSP1_CORE_SOFT_RESET 0x02B80010 +#define CS35L41_DSP1_DEBUG 0x02B80040 +#define CS35L41_DSP1_TIMER_CTRL 0x02B80048 +#define CS35L41_DSP1_STREAM_ARB_CTRL 0x02B80050 +#define CS35L41_DSP1_RX1_RATE 0x02B80080 +#define CS35L41_DSP1_RX2_RATE 0x02B80088 +#define CS35L41_DSP1_RX3_RATE 0x02B80090 +#define CS35L41_DSP1_RX4_RATE 0x02B80098 +#define CS35L41_DSP1_RX5_RATE 0x02B800A0 +#define CS35L41_DSP1_RX6_RATE 0x02B800A8 +#define CS35L41_DSP1_RX7_RATE 0x02B800B0 +#define CS35L41_DSP1_RX8_RATE 0x02B800B8 +#define CS35L41_DSP1_TX1_RATE 0x02B80280 +#define CS35L41_DSP1_TX2_RATE 0x02B80288 +#define CS35L41_DSP1_TX3_RATE 0x02B80290 +#define CS35L41_DSP1_TX4_RATE 0x02B80298 +#define CS35L41_DSP1_TX5_RATE 0x02B802A0 +#define CS35L41_DSP1_TX6_RATE 0x02B802A8 +#define CS35L41_DSP1_TX7_RATE 0x02B802B0 +#define CS35L41_DSP1_TX8_RATE 0x02B802B8 +#define CS35L41_DSP1_NMI_CTRL1 0x02B80480 +#define CS35L41_DSP1_NMI_CTRL2 0x02B80488 +#define CS35L41_DSP1_NMI_CTRL3 0x02B80490 +#define CS35L41_DSP1_NMI_CTRL4 0x02B80498 +#define CS35L41_DSP1_NMI_CTRL5 0x02B804A0 +#define CS35L41_DSP1_NMI_CTRL6 0x02B804A8 +#define CS35L41_DSP1_NMI_CTRL7 0x02B804B0 +#define CS35L41_DSP1_NMI_CTRL8 0x02B804B8 +#define CS35L41_DSP1_RESUME_CTRL 0x02B80500 +#define CS35L41_DSP1_IRQ1_CTRL 0x02B80508 +#define CS35L41_DSP1_IRQ2_CTRL 0x02B80510 +#define CS35L41_DSP1_IRQ3_CTRL 0x02B80518 +#define CS35L41_DSP1_IRQ4_CTRL 0x02B80520 +#define CS35L41_DSP1_IRQ5_CTRL 0x02B80528 +#define CS35L41_DSP1_IRQ6_CTRL 0x02B80530 +#define CS35L41_DSP1_IRQ7_CTRL 0x02B80538 +#define CS35L41_DSP1_IRQ8_CTRL 0x02B80540 +#define CS35L41_DSP1_IRQ9_CTRL 0x02B80548 +#define CS35L41_DSP1_IRQ10_CTRL 0x02B80550 +#define CS35L41_DSP1_IRQ11_CTRL 0x02B80558 +#define CS35L41_DSP1_IRQ12_CTRL 0x02B80560 +#define CS35L41_DSP1_IRQ13_CTRL 0x02B80568 +#define CS35L41_DSP1_IRQ14_CTRL 0x02B80570 +#define CS35L41_DSP1_IRQ15_CTRL 0x02B80578 +#define CS35L41_DSP1_IRQ16_CTRL 0x02B80580 +#define CS35L41_DSP1_IRQ17_CTRL 0x02B80588 +#define CS35L41_DSP1_IRQ18_CTRL 0x02B80590 +#define CS35L41_DSP1_IRQ19_CTRL 0x02B80598 +#define CS35L41_DSP1_IRQ20_CTRL 0x02B805A0 +#define CS35L41_DSP1_IRQ21_CTRL 0x02B805A8 +#define CS35L41_DSP1_IRQ22_CTRL 0x02B805B0 +#define CS35L41_DSP1_IRQ23_CTRL 0x02B805B8 +#define CS35L41_DSP1_SCRATCH1 0x02B805C0 +#define CS35L41_DSP1_SCRATCH2 0x02B805C8 +#define CS35L41_DSP1_SCRATCH3 0x02B805D0 +#define CS35L41_DSP1_SCRATCH4 0x02B805D8 +#define CS35L41_DSP1_CCM_CORE_CTRL 0x02BC1000 +#define CS35L41_DSP1_CCM_CLK_OVERRIDE 0x02BC1008 +#define CS35L41_DSP1_XM_MSTR_EN 0x02BC2000 +#define CS35L41_DSP1_XM_CORE_PRI 0x02BC2008 +#define CS35L41_DSP1_XM_AHB_PACK_PL_PRI 0x02BC2010 +#define CS35L41_DSP1_XM_AHB_UP_PL_PRI 0x02BC2018 +#define CS35L41_DSP1_XM_ACCEL_PL0_PRI 0x02BC2020 +#define CS35L41_DSP1_XM_NPL0_PRI 0x02BC2078 +#define CS35L41_DSP1_YM_MSTR_EN 0x02BC20C0 +#define CS35L41_DSP1_YM_CORE_PRI 0x02BC20C8 +#define CS35L41_DSP1_YM_AHB_PACK_PL_PRI 0x02BC20D0 +#define CS35L41_DSP1_YM_AHB_UP_PL_PRI 0x02BC20D8 +#define CS35L41_DSP1_YM_ACCEL_PL0_PRI 0x02BC20E0 +#define CS35L41_DSP1_YM_NPL0_PRI 0x02BC2138 +#define CS35L41_DSP1_PM_MSTR_EN 0x02BC2180 +#define CS35L41_DSP1_PM_PATCH0_ADDR 0x02BC2188 +#define CS35L41_DSP1_PM_PATCH0_EN 0x02BC218C +#define CS35L41_DSP1_PM_PATCH0_DATA_LO 0x02BC2190 +#define CS35L41_DSP1_PM_PATCH0_DATA_HI 0x02BC2194 +#define CS35L41_DSP1_PM_PATCH1_ADDR 0x02BC2198 +#define CS35L41_DSP1_PM_PATCH1_EN 0x02BC219C +#define CS35L41_DSP1_PM_PATCH1_DATA_LO 0x02BC21A0 +#define CS35L41_DSP1_PM_PATCH1_DATA_HI 0x02BC21A4 +#define CS35L41_DSP1_PM_PATCH2_ADDR 0x02BC21A8 +#define CS35L41_DSP1_PM_PATCH2_EN 0x02BC21AC +#define CS35L41_DSP1_PM_PATCH2_DATA_LO 0x02BC21B0 +#define CS35L41_DSP1_PM_PATCH2_DATA_HI 0x02BC21B4 +#define CS35L41_DSP1_PM_PATCH3_ADDR 0x02BC21B8 +#define CS35L41_DSP1_PM_PATCH3_EN 0x02BC21BC +#define CS35L41_DSP1_PM_PATCH3_DATA_LO 0x02BC21C0 +#define CS35L41_DSP1_PM_PATCH3_DATA_HI 0x02BC21C4 +#define CS35L41_DSP1_PM_PATCH4_ADDR 0x02BC21C8 +#define CS35L41_DSP1_PM_PATCH4_EN 0x02BC21CC +#define CS35L41_DSP1_PM_PATCH4_DATA_LO 0x02BC21D0 +#define CS35L41_DSP1_PM_PATCH4_DATA_HI 0x02BC21D4 +#define CS35L41_DSP1_PM_PATCH5_ADDR 0x02BC21D8 +#define CS35L41_DSP1_PM_PATCH5_EN 0x02BC21DC +#define CS35L41_DSP1_PM_PATCH5_DATA_LO 0x02BC21E0 +#define CS35L41_DSP1_PM_PATCH5_DATA_HI 0x02BC21E4 +#define CS35L41_DSP1_PM_PATCH6_ADDR 0x02BC21E8 +#define CS35L41_DSP1_PM_PATCH6_EN 0x02BC21EC +#define CS35L41_DSP1_PM_PATCH6_DATA_LO 0x02BC21F0 +#define CS35L41_DSP1_PM_PATCH6_DATA_HI 0x02BC21F4 +#define CS35L41_DSP1_PM_PATCH7_ADDR 0x02BC21F8 +#define CS35L41_DSP1_PM_PATCH7_EN 0x02BC21FC +#define CS35L41_DSP1_PM_PATCH7_DATA_LO 0x02BC2200 +#define CS35L41_DSP1_PM_PATCH7_DATA_HI 0x02BC2204 +#define CS35L41_DSP1_MPU_XM_ACCESS0 0x02BC3000 +#define CS35L41_DSP1_MPU_YM_ACCESS0 0x02BC3004 +#define CS35L41_DSP1_MPU_WNDW_ACCESS0 0x02BC3008 +#define CS35L41_DSP1_MPU_XREG_ACCESS0 0x02BC300C +#define CS35L41_DSP1_MPU_YREG_ACCESS0 0x02BC3014 +#define CS35L41_DSP1_MPU_XM_ACCESS1 0x02BC3018 +#define CS35L41_DSP1_MPU_YM_ACCESS1 0x02BC301C +#define CS35L41_DSP1_MPU_WNDW_ACCESS1 0x02BC3020 +#define CS35L41_DSP1_MPU_XREG_ACCESS1 0x02BC3024 +#define CS35L41_DSP1_MPU_YREG_ACCESS1 0x02BC302C +#define CS35L41_DSP1_MPU_XM_ACCESS2 0x02BC3030 +#define CS35L41_DSP1_MPU_YM_ACCESS2 0x02BC3034 +#define CS35L41_DSP1_MPU_WNDW_ACCESS2 0x02BC3038 +#define CS35L41_DSP1_MPU_XREG_ACCESS2 0x02BC303C +#define CS35L41_DSP1_MPU_YREG_ACCESS2 0x02BC3044 +#define CS35L41_DSP1_MPU_XM_ACCESS3 0x02BC3048 +#define CS35L41_DSP1_MPU_YM_ACCESS3 0x02BC304C +#define CS35L41_DSP1_MPU_WNDW_ACCESS3 0x02BC3050 +#define CS35L41_DSP1_MPU_XREG_ACCESS3 0x02BC3054 +#define CS35L41_DSP1_MPU_YREG_ACCESS3 0x02BC305C +#define CS35L41_DSP1_MPU_XM_VIO_ADDR 0x02BC3100 +#define CS35L41_DSP1_MPU_XM_VIO_STATUS 0x02BC3104 +#define CS35L41_DSP1_MPU_YM_VIO_ADDR 0x02BC3108 +#define CS35L41_DSP1_MPU_YM_VIO_STATUS 0x02BC310C +#define CS35L41_DSP1_MPU_PM_VIO_ADDR 0x02BC3110 +#define CS35L41_DSP1_MPU_PM_VIO_STATUS 0x02BC3114 +#define CS35L41_DSP1_MPU_LOCK_CONFIG 0x02BC3140 +#define CS35L41_DSP1_MPU_WDT_RST_CTRL 0x02BC3180 +#define CS35L41_DSP1_STRMARB_MSTR0_CFG0 0x02BC5000 +#define CS35L41_DSP1_STRMARB_MSTR0_CFG1 0x02BC5004 +#define CS35L41_DSP1_STRMARB_MSTR0_CFG2 0x02BC5008 +#define CS35L41_DSP1_STRMARB_MSTR1_CFG0 0x02BC5010 +#define CS35L41_DSP1_STRMARB_MSTR1_CFG1 0x02BC5014 +#define CS35L41_DSP1_STRMARB_MSTR1_CFG2 0x02BC5018 +#define CS35L41_DSP1_STRMARB_MSTR2_CFG0 0x02BC5020 +#define CS35L41_DSP1_STRMARB_MSTR2_CFG1 0x02BC5024 +#define CS35L41_DSP1_STRMARB_MSTR2_CFG2 0x02BC5028 +#define CS35L41_DSP1_STRMARB_MSTR3_CFG0 0x02BC5030 +#define CS35L41_DSP1_STRMARB_MSTR3_CFG1 0x02BC5034 +#define CS35L41_DSP1_STRMARB_MSTR3_CFG2 0x02BC5038 +#define CS35L41_DSP1_STRMARB_MSTR4_CFG0 0x02BC5040 +#define CS35L41_DSP1_STRMARB_MSTR4_CFG1 0x02BC5044 +#define CS35L41_DSP1_STRMARB_MSTR4_CFG2 0x02BC5048 +#define CS35L41_DSP1_STRMARB_MSTR5_CFG0 0x02BC5050 +#define CS35L41_DSP1_STRMARB_MSTR5_CFG1 0x02BC5054 +#define CS35L41_DSP1_STRMARB_MSTR5_CFG2 0x02BC5058 +#define CS35L41_DSP1_STRMARB_MSTR6_CFG0 0x02BC5060 +#define CS35L41_DSP1_STRMARB_MSTR6_CFG1 0x02BC5064 +#define CS35L41_DSP1_STRMARB_MSTR6_CFG2 0x02BC5068 +#define CS35L41_DSP1_STRMARB_MSTR7_CFG0 0x02BC5070 +#define CS35L41_DSP1_STRMARB_MSTR7_CFG1 0x02BC5074 +#define CS35L41_DSP1_STRMARB_MSTR7_CFG2 0x02BC5078 +#define CS35L41_DSP1_STRMARB_TX0_CFG0 0x02BC5200 +#define CS35L41_DSP1_STRMARB_TX0_CFG1 0x02BC5204 +#define CS35L41_DSP1_STRMARB_TX1_CFG0 0x02BC5208 +#define CS35L41_DSP1_STRMARB_TX1_CFG1 0x02BC520C +#define CS35L41_DSP1_STRMARB_TX2_CFG0 0x02BC5210 +#define CS35L41_DSP1_STRMARB_TX2_CFG1 0x02BC5214 +#define CS35L41_DSP1_STRMARB_TX3_CFG0 0x02BC5218 +#define CS35L41_DSP1_STRMARB_TX3_CFG1 0x02BC521C +#define CS35L41_DSP1_STRMARB_TX4_CFG0 0x02BC5220 +#define CS35L41_DSP1_STRMARB_TX4_CFG1 0x02BC5224 +#define CS35L41_DSP1_STRMARB_TX5_CFG0 0x02BC5228 +#define CS35L41_DSP1_STRMARB_TX5_CFG1 0x02BC522C +#define CS35L41_DSP1_STRMARB_TX6_CFG0 0x02BC5230 +#define CS35L41_DSP1_STRMARB_TX6_CFG1 0x02BC5234 +#define CS35L41_DSP1_STRMARB_TX7_CFG0 0x02BC5238 +#define CS35L41_DSP1_STRMARB_TX7_CFG1 0x02BC523C +#define CS35L41_DSP1_STRMARB_RX0_CFG0 0x02BC5400 +#define CS35L41_DSP1_STRMARB_RX0_CFG1 0x02BC5404 +#define CS35L41_DSP1_STRMARB_RX1_CFG0 0x02BC5408 +#define CS35L41_DSP1_STRMARB_RX1_CFG1 0x02BC540C +#define CS35L41_DSP1_STRMARB_RX2_CFG0 0x02BC5410 +#define CS35L41_DSP1_STRMARB_RX2_CFG1 0x02BC5414 +#define CS35L41_DSP1_STRMARB_RX3_CFG0 0x02BC5418 +#define CS35L41_DSP1_STRMARB_RX3_CFG1 0x02BC541C +#define CS35L41_DSP1_STRMARB_RX4_CFG0 0x02BC5420 +#define CS35L41_DSP1_STRMARB_RX4_CFG1 0x02BC5424 +#define CS35L41_DSP1_STRMARB_RX5_CFG0 0x02BC5428 +#define CS35L41_DSP1_STRMARB_RX5_CFG1 0x02BC542C +#define CS35L41_DSP1_STRMARB_RX6_CFG0 0x02BC5430 +#define CS35L41_DSP1_STRMARB_RX6_CFG1 0x02BC5434 +#define CS35L41_DSP1_STRMARB_RX7_CFG0 0x02BC5438 +#define CS35L41_DSP1_STRMARB_RX7_CFG1 0x02BC543C +#define CS35L41_DSP1_STRMARB_IRQ0_CFG0 0x02BC5600 +#define CS35L41_DSP1_STRMARB_IRQ0_CFG1 0x02BC5604 +#define CS35L41_DSP1_STRMARB_IRQ0_CFG2 0x02BC5608 +#define CS35L41_DSP1_STRMARB_IRQ1_CFG0 0x02BC5610 +#define CS35L41_DSP1_STRMARB_IRQ1_CFG1 0x02BC5614 +#define CS35L41_DSP1_STRMARB_IRQ1_CFG2 0x02BC5618 +#define CS35L41_DSP1_STRMARB_IRQ2_CFG0 0x02BC5620 +#define CS35L41_DSP1_STRMARB_IRQ2_CFG1 0x02BC5624 +#define CS35L41_DSP1_STRMARB_IRQ2_CFG2 0x02BC5628 +#define CS35L41_DSP1_STRMARB_IRQ3_CFG0 0x02BC5630 +#define CS35L41_DSP1_STRMARB_IRQ3_CFG1 0x02BC5634 +#define CS35L41_DSP1_STRMARB_IRQ3_CFG2 0x02BC5638 +#define CS35L41_DSP1_STRMARB_IRQ4_CFG0 0x02BC5640 +#define CS35L41_DSP1_STRMARB_IRQ4_CFG1 0x02BC5644 +#define CS35L41_DSP1_STRMARB_IRQ4_CFG2 0x02BC5648 +#define CS35L41_DSP1_STRMARB_IRQ5_CFG0 0x02BC5650 +#define CS35L41_DSP1_STRMARB_IRQ5_CFG1 0x02BC5654 +#define CS35L41_DSP1_STRMARB_IRQ5_CFG2 0x02BC5658 +#define CS35L41_DSP1_STRMARB_IRQ6_CFG0 0x02BC5660 +#define CS35L41_DSP1_STRMARB_IRQ6_CFG1 0x02BC5664 +#define CS35L41_DSP1_STRMARB_IRQ6_CFG2 0x02BC5668 +#define CS35L41_DSP1_STRMARB_IRQ7_CFG0 0x02BC5670 +#define CS35L41_DSP1_STRMARB_IRQ7_CFG1 0x02BC5674 +#define CS35L41_DSP1_STRMARB_IRQ7_CFG2 0x02BC5678 +#define CS35L41_DSP1_STRMARB_RESYNC_MSK 0x02BC5A00 +#define CS35L41_DSP1_STRMARB_ERR_STATUS 0x02BC5A08 +#define CS35L41_DSP1_INTPCTL_RES_STATIC 0x02BC6000 +#define CS35L41_DSP1_INTPCTL_RES_DYN 0x02BC6004 +#define CS35L41_DSP1_INTPCTL_NMI_CTRL 0x02BC6008 +#define CS35L41_DSP1_INTPCTL_IRQ_INV 0x02BC6010 +#define CS35L41_DSP1_INTPCTL_IRQ_MODE 0x02BC6014 +#define CS35L41_DSP1_INTPCTL_IRQ_EN 0x02BC6018 +#define CS35L41_DSP1_INTPCTL_IRQ_MSK 0x02BC601C +#define CS35L41_DSP1_INTPCTL_IRQ_FLUSH 0x02BC6020 +#define CS35L41_DSP1_INTPCTL_IRQ_MSKCLR 0x02BC6024 +#define CS35L41_DSP1_INTPCTL_IRQ_FRC 0x02BC6028 +#define CS35L41_DSP1_INTPCTL_IRQ_MSKSET 0x02BC602C +#define CS35L41_DSP1_INTPCTL_IRQ_ERR 0x02BC6030 +#define CS35L41_DSP1_INTPCTL_IRQ_PEND 0x02BC6034 +#define CS35L41_DSP1_INTPCTL_IRQ_GEN 0x02BC6038 +#define CS35L41_DSP1_INTPCTL_TESTBITS 0x02BC6040 +#define CS35L41_DSP1_WDT_CONTROL 0x02BC7000 +#define CS35L41_DSP1_WDT_STATUS 0x02BC7008 +#define CS35L41_DSP1_YMEM_PACK_0 0x02C00000 +#define CS35L41_DSP1_YMEM_PACK_1532 0x02C017F0 +#define CS35L41_DSP1_YMEM_UNPACK32_0 0x03000000 +#define CS35L41_DSP1_YMEM_UNPACK32_1022 0x03000FF8 +#define CS35L41_DSP1_YMEM_UNPACK24_0 0x03400000 +#define CS35L41_DSP1_YMEM_UNPACK24_2045 0x03401FF4 +#define CS35L41_DSP1_PMEM_0 0x03800000 +#define CS35L41_DSP1_PMEM_5114 0x03804FE8 + +/*test regs for emulation bringup*/ +#define CS35L41_PLL_OVR 0x00003018 +#define CS35L41_BST_TEST_DUTY 0x00003900 +#define CS35L41_DIGPWM_IOCTRL 0x0000706C + +/*registers populated by OTP*/ +#define CS35L41_OTP_TRIM_1 0x0000208c +#define CS35L41_OTP_TRIM_2 0x00002090 +#define CS35L41_OTP_TRIM_3 0x00003010 +#define CS35L41_OTP_TRIM_4 0x0000300C +#define CS35L41_OTP_TRIM_5 0x0000394C +#define CS35L41_OTP_TRIM_6 0x00003950 +#define CS35L41_OTP_TRIM_7 0x00003954 +#define CS35L41_OTP_TRIM_8 0x00003958 +#define CS35L41_OTP_TRIM_9 0x0000395C +#define CS35L41_OTP_TRIM_10 0x0000416C +#define CS35L41_OTP_TRIM_11 0x00004160 +#define CS35L41_OTP_TRIM_12 0x00004170 +#define CS35L41_OTP_TRIM_13 0x00004360 +#define CS35L41_OTP_TRIM_14 0x00004448 +#define CS35L41_OTP_TRIM_15 0x0000444C +#define CS35L41_OTP_TRIM_16 0x00006E30 +#define CS35L41_OTP_TRIM_17 0x00006E34 +#define CS35L41_OTP_TRIM_18 0x00006E38 +#define CS35L41_OTP_TRIM_19 0x00006E3C +#define CS35L41_OTP_TRIM_20 0x00006E40 +#define CS35L41_OTP_TRIM_21 0x00006E44 +#define CS35L41_OTP_TRIM_22 0x00006E48 +#define CS35L41_OTP_TRIM_23 0x00006E4C +#define CS35L41_OTP_TRIM_24 0x00006E50 +#define CS35L41_OTP_TRIM_25 0x00006E54 +#define CS35L41_OTP_TRIM_26 0x00006E58 +#define CS35L41_OTP_TRIM_27 0x00006E5C +#define CS35L41_OTP_TRIM_28 0x00006E60 +#define CS35L41_OTP_TRIM_29 0x00006E64 +#define CS35L41_OTP_TRIM_30 0x00007418 +#define CS35L41_OTP_TRIM_31 0x0000741C +#define CS35L41_OTP_TRIM_32 0x00007434 +#define CS35L41_OTP_TRIM_33 0x00007068 +#define CS35L41_OTP_TRIM_34 0x0000410C +#define CS35L41_OTP_TRIM_35 0x0000400C +#define CS35L41_OTP_TRIM_36 0x00002030 + +#define CS35L41_MAX_CACHE_REG 36 +#define CS35L41_OTP_SIZE_WORDS 32 +#define CS35L41_NUM_OTP_ELEM 100 +#define CS35L41_NUM_OTP_MAPS 5 + +#define CS35L41_VALID_PDATA 0x80000000 +#define CS35L41_NUM_SUPPLIES 2 + +#define CS35L41_SCLK_MSTR_MASK 0x10 +#define CS35L41_SCLK_MSTR_SHIFT 4 +#define CS35L41_LRCLK_MSTR_MASK 0x01 +#define CS35L41_LRCLK_MSTR_SHIFT 0 +#define CS35L41_SCLK_INV_MASK 0x40 +#define CS35L41_SCLK_INV_SHIFT 6 +#define CS35L41_LRCLK_INV_MASK 0x04 +#define CS35L41_LRCLK_INV_SHIFT 2 +#define CS35L41_SCLK_FRC_MASK 0x20 +#define CS35L41_SCLK_FRC_SHIFT 5 +#define CS35L41_LRCLK_FRC_MASK 0x02 +#define CS35L41_LRCLK_FRC_SHIFT 1 + +#define CS35L41_AMP_GAIN_PCM_MASK 0x3E0 +#define CS35L41_AMP_GAIN_ZC_MASK 0x0400 +#define CS35L41_AMP_GAIN_ZC_SHIFT 10 + +#define CS35L41_BST_CTL_MASK 0xFF +#define CS35L41_BST_CTL_SEL_MASK 0x03 +#define CS35L41_BST_CTL_SEL_REG 0x00 +#define CS35L41_BST_CTL_SEL_CLASSH 0x01 +#define CS35L41_BST_IPK_MASK 0x7F +#define CS35L41_BST_IPK_SHIFT 0 +#define CS35L41_BST_LIM_MASK 0x4 +#define CS35L41_BST_LIM_SHIFT 2 +#define CS35L41_BST_K1_MASK 0x000000FF +#define CS35L41_BST_K1_SHIFT 0 +#define CS35L41_BST_K2_MASK 0x0000FF00 +#define CS35L41_BST_K2_SHIFT 8 +#define CS35L41_BST_SLOPE_MASK 0x0000FF00 +#define CS35L41_BST_SLOPE_SHIFT 8 +#define CS35L41_BST_LBST_VAL_MASK 0x00000003 +#define CS35L41_BST_LBST_VAL_SHIFT 0 + +#define CS35L41_TEMP_THLD_MASK 0x03 +#define CS35L41_VMON_IMON_VOL_MASK 0x07FF07FF +#define CS35L41_PDM_MODE_MASK 0x01 +#define CS35L41_PDM_MODE_SHIFT 0 + +#define CS35L41_CH_MEM_DEPTH_MASK 0x07 +#define CS35L41_CH_MEM_DEPTH_SHIFT 0 +#define CS35L41_CH_HDRM_CTL_MASK 0x007F0000 +#define CS35L41_CH_HDRM_CTL_SHIFT 16 +#define CS35L41_CH_REL_RATE_MASK 0xFF00 +#define CS35L41_CH_REL_RATE_SHIFT 8 +#define CS35L41_CH_WKFET_DLY_MASK 0x001C +#define CS35L41_CH_WKFET_DLY_SHIFT 2 +#define CS35L41_CH_WKFET_THLD_MASK 0x0F00 +#define CS35L41_CH_WKFET_THLD_SHIFT 8 + +#define CS35L41_HW_NG_SEL_MASK 0x3F00 +#define CS35L41_HW_NG_SEL_SHIFT 8 +#define CS35L41_HW_NG_DLY_MASK 0x0070 +#define CS35L41_HW_NG_DLY_SHIFT 4 +#define CS35L41_HW_NG_THLD_MASK 0x0007 +#define CS35L41_HW_NG_THLD_SHIFT 0 + +#define CS35L41_DSP_NG_ENABLE_MASK 0x00010000 +#define CS35L41_DSP_NG_ENABLE_SHIFT 16 +#define CS35L41_DSP_NG_THLD_MASK 0x7 +#define CS35L41_DSP_NG_THLD_SHIFT 0 +#define CS35L41_DSP_NG_DELAY_MASK 0x0F00 +#define CS35L41_DSP_NG_DELAY_SHIFT 8 + +#define CS35L41_ASP_FMT_MASK 0x0700 +#define CS35L41_ASP_FMT_SHIFT 8 +#define CS35L41_ASP_DOUT_HIZ_MASK 0x03 +#define CS35L41_ASP_DOUT_HIZ_SHIFT 0 +#define CS35L41_ASP_WIDTH_16 0x10 +#define CS35L41_ASP_WIDTH_24 0x18 +#define CS35L41_ASP_WIDTH_32 0x20 +#define CS35L41_ASP_WIDTH_TX_MASK 0xFF0000 +#define CS35L41_ASP_WIDTH_TX_SHIFT 16 +#define CS35L41_ASP_WIDTH_RX_MASK 0xFF000000 +#define CS35L41_ASP_WIDTH_RX_SHIFT 24 +#define CS35L41_ASP_RX1_SLOT_MASK 0x3F +#define CS35L41_ASP_RX1_SLOT_SHIFT 0 +#define CS35L41_ASP_RX2_SLOT_MASK 0x3F00 +#define CS35L41_ASP_RX2_SLOT_SHIFT 8 +#define CS35L41_ASP_RX_WL_MASK 0x3F +#define CS35L41_ASP_TX_WL_MASK 0x3F +#define CS35L41_ASP_RX_WL_SHIFT 0 +#define CS35L41_ASP_TX_WL_SHIFT 0 +#define CS35L41_ASP_SOURCE_MASK 0x7F + +#define CS35L41_INPUT_SRC_ASPRX1 0x08 +#define CS35L41_INPUT_SRC_ASPRX2 0x09 +#define CS35L41_INPUT_SRC_VMON 0x18 +#define CS35L41_INPUT_SRC_IMON 0x19 +#define CS35L41_INPUT_SRC_CLASSH 0x21 +#define CS35L41_INPUT_SRC_VPMON 0x28 +#define CS35L41_INPUT_SRC_VBSTMON 0x29 +#define CS35L41_INPUT_SRC_TEMPMON 0x3A +#define CS35L41_INPUT_SRC_RSVD 0x3B +#define CS35L41_INPUT_DSP_TX1 0x32 +#define CS35L41_INPUT_DSP_TX2 0x33 + +#define CS35L41_PLL_CLK_SEL_MASK 0x07 +#define CS35L41_PLL_CLK_SEL_SHIFT 0 +#define CS35L41_PLL_CLK_EN_MASK 0x10 +#define CS35L41_PLL_CLK_EN_SHIFT 4 +#define CS35L41_PLL_OPENLOOP_MASK 0x0800 +#define CS35L41_PLL_OPENLOOP_SHIFT 11 +#define CS35L41_PLLSRC_SCLK 0 +#define CS35L41_PLLSRC_LRCLK 1 +#define CS35L41_PLLSRC_SELF 3 +#define CS35L41_PLLSRC_PDMCLK 4 +#define CS35L41_PLLSRC_MCLK 5 +#define CS35L41_PLLSRC_SWIRE 7 +#define CS35L41_REFCLK_FREQ_MASK 0x7E0 +#define CS35L41_REFCLK_FREQ_SHIFT 5 + +#define CS35L41_GLOBAL_FS_MASK 0x1F +#define CS35L41_GLOBAL_FS_SHIFT 0 + +#define CS35L41_GLOBAL_EN_MASK 0x01 +#define CS35L41_GLOBAL_EN_SHIFT 0 +#define CS35L41_BST_EN_MASK 0x0030 +#define CS35L41_BST_EN_SHIFT 4 +#define CS35L41_BST_EN_DEFAULT 0x2 +#define CS35L41_AMP_EN_SHIFT 0 +#define CS35L41_AMP_EN_MASK 1 + +#define CS35L41_PDN_DONE_MASK 0x00800000 +#define CS35L41_PDN_DONE_SHIFT 23 +#define CS35L41_PUP_DONE_MASK 0x01000000 +#define CS35L41_PUP_DONE_SHIFT 24 + +#define CS35L36_PUP_DONE_IRQ_UNMASK 0x5F +#define CS35L36_PUP_DONE_IRQ_MASK 0xBF + +#define CS35L41_AMP_SHORT_ERR 0x80000000 +#define CS35L41_BST_SHORT_ERR 0x0100 +#define CS35L41_TEMP_WARN 0x8000 +#define CS35L41_TEMP_ERR 0x00020000 +#define CS35L41_BST_OVP_ERR 0x40 +#define CS35L41_BST_DCM_UVP_ERR 0x80 +#define CS35L41_OTP_BOOT_DONE 0x02 +#define CS35L41_PLL_UNLOCK 0x10 +#define CS35L41_OTP_BOOT_ERR 0x80000000 + +#define CS35L41_AMP_SHORT_ERR_RLS 0x02 +#define CS35L41_BST_SHORT_ERR_RLS 0x04 +#define CS35L41_BST_OVP_ERR_RLS 0x08 +#define CS35L41_BST_UVP_ERR_RLS 0x10 +#define CS35L41_TEMP_WARN_ERR_RLS 0x20 +#define CS35L41_TEMP_ERR_RLS 0x40 + +#define CS35L41_INT1_MASK_DEFAULT 0x7FFCFE3F +#define CS35L41_INT1_UNMASK_PUP 0xFEFFFFFF +#define CS35L41_INT1_UNMASK_PDN 0xFF7FFFFF + +#define CS35L41_GPIO_DIR_MASK 0x80000000 +#define CS35L41_GPIO_DIR_SHIFT 31 +#define CS35L41_GPIO1_CTRL_MASK 0x00030000 +#define CS35L41_GPIO1_CTRL_SHIFT 16 +#define CS35L41_GPIO2_CTRL_MASK 0x07000000 +#define CS35L41_GPIO2_CTRL_SHIFT 24 +#define CS35L41_GPIO_CTRL_OPEN_INT 2 +#define CS35L41_GPIO_CTRL_ACTV_LO 4 +#define CS35L41_GPIO_CTRL_ACTV_HI 5 +#define CS35L41_GPIO_POL_MASK 0x1000 +#define CS35L41_GPIO_POL_SHIFT 12 + +#define CS35L41_AMP_INV_PCM_SHIFT 14 +#define CS35L41_AMP_INV_PCM_MASK BIT(CS35L41_AMP_INV_PCM_SHIFT) +#define CS35L41_AMP_PCM_VOL_SHIFT 3 +#define CS35L41_AMP_PCM_VOL_MASK (0x7FF << 3) +#define CS35L41_AMP_PCM_VOL_MUTE 0x4CF + +#define CS35L41_CHIP_ID 0x35a40 +#define CS35L41R_CHIP_ID 0x35b40 +#define CS35L41_MTLREVID_MASK 0x0F +#define CS35L41_REVID_A0 0xA0 +#define CS35L41_REVID_B0 0xB0 +#define CS35L41_REVID_B2 0xB2 + +#define CS35L41_HALO_CORE_RESET 0x00000200 + +#define CS35L41_FS1_WINDOW_MASK 0x000007FF +#define CS35L41_FS2_WINDOW_MASK 0x00FFF800 +#define CS35L41_FS2_WINDOW_SHIFT 12 + +#define CS35L41_SPI_MAX_FREQ 4000000 +#define CS35L41_REGSTRIDE 4 + enum cs35l41_clk_ids { CS35L41_CLKID_SCLK = 0, CS35L41_CLKID_LRCLK = 1, @@ -31,4 +746,22 @@ struct cs35l41_platform_data { struct cs35l41_irq_cfg irq_config2; }; +struct cs35l41_otp_packed_element_t { + u32 reg; + u8 shift; + u8 size; +}; + +struct cs35l41_otp_map_element_t { + u32 id; + u32 num_elements; + const struct cs35l41_otp_packed_element_t *map; + u32 bit_offset; + u32 word_offset; +}; + +extern const struct cs35l41_otp_map_element_t cs35l41_otp_map_map[CS35L41_NUM_OTP_MAPS]; +extern struct regmap_config cs35l41_regmap_i2c; +extern struct regmap_config cs35l41_regmap_spi; + #endif /* __CS35L41_H */ -- cgit v1.2.3 From fe120d4cb6f6cd03007239e7c578b8703fe6d336 Mon Sep 17 00:00:00 2001 From: Lucas Tanure Date: Fri, 17 Dec 2021 11:57:00 +0000 Subject: ASoC: cs35l41: Move cs35l41_otp_unpack to shared code ASoC and HDA will do the same cs35l41_otp_unpack, so move it to shared code Signed-off-by: Lucas Tanure Link: https://lore.kernel.org/r/20211217115708.882525-3-tanureal@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l41.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h index aac3ffb9bc89..6cf3ef02b26a 100644 --- a/include/sound/cs35l41.h +++ b/include/sound/cs35l41.h @@ -534,7 +534,6 @@ #define CS35L41_MAX_CACHE_REG 36 #define CS35L41_OTP_SIZE_WORDS 32 #define CS35L41_NUM_OTP_ELEM 100 -#define CS35L41_NUM_OTP_MAPS 5 #define CS35L41_VALID_PDATA 0x80000000 #define CS35L41_NUM_SUPPLIES 2 @@ -760,8 +759,9 @@ struct cs35l41_otp_map_element_t { u32 word_offset; }; -extern const struct cs35l41_otp_map_element_t cs35l41_otp_map_map[CS35L41_NUM_OTP_MAPS]; extern struct regmap_config cs35l41_regmap_i2c; extern struct regmap_config cs35l41_regmap_spi; +int cs35l41_otp_unpack(struct device *dev, struct regmap *regmap); + #endif /* __CS35L41_H */ -- cgit v1.2.3 From 8b2278604b6de27329ec7ed82ca696c4751111b6 Mon Sep 17 00:00:00 2001 From: Lucas Tanure Date: Fri, 17 Dec 2021 11:57:02 +0000 Subject: ASoC: cs35l41: Create shared function for errata patches ASoC and HDA systems require the same errata patches, so move it to the shared code using a function the correctly applies the patches by revision Also, move CS35L41_DSP1_CCM_CORE_CTRL write to errata patch function as is required to be written at boot, but not in regmap_register_patch sequence as will affect waking up from hibernation Signed-off-by: Lucas Tanure Link: https://lore.kernel.org/r/20211217115708.882525-5-tanureal@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l41.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h index 6cf3ef02b26a..ad2e32a12b8c 100644 --- a/include/sound/cs35l41.h +++ b/include/sound/cs35l41.h @@ -763,5 +763,6 @@ extern struct regmap_config cs35l41_regmap_i2c; extern struct regmap_config cs35l41_regmap_spi; int cs35l41_otp_unpack(struct device *dev, struct regmap *regmap); +int cs35l41_register_errata_patch(struct device *dev, struct regmap *reg, unsigned int reg_revid); #endif /* __CS35L41_H */ -- cgit v1.2.3 From 3bc3e3da657f17c14df8ae8fab58183407bd7521 Mon Sep 17 00:00:00 2001 From: Lucas Tanure Date: Fri, 17 Dec 2021 11:57:03 +0000 Subject: ASoC: cs35l41: Create shared function for setting channels ASoC and HDA will use the same register to set channels for the device Signed-off-by: Lucas Tanure Link: https://lore.kernel.org/r/20211217115708.882525-6-tanureal@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l41.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h index ad2e32a12b8c..39d150f61382 100644 --- a/include/sound/cs35l41.h +++ b/include/sound/cs35l41.h @@ -764,5 +764,8 @@ extern struct regmap_config cs35l41_regmap_spi; int cs35l41_otp_unpack(struct device *dev, struct regmap *regmap); int cs35l41_register_errata_patch(struct device *dev, struct regmap *reg, unsigned int reg_revid); +int cs35l41_set_channels(struct device *dev, struct regmap *reg, + unsigned int tx_num, unsigned int *tx_slot, + unsigned int rx_num, unsigned int *rx_slot); #endif /* __CS35L41_H */ -- cgit v1.2.3 From e8e4fcc047c6e0c5411faeb8cc29aed2e5036a00 Mon Sep 17 00:00:00 2001 From: Lucas Tanure Date: Fri, 17 Dec 2021 11:57:04 +0000 Subject: ASoC: cs35l41: Create shared function for boost configuration ASoC and HDA will use the same registers to configure internal boost for the device Signed-off-by: Lucas Tanure Link: https://lore.kernel.org/r/20211217115708.882525-7-tanureal@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l41.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h index 39d150f61382..29a527457b48 100644 --- a/include/sound/cs35l41.h +++ b/include/sound/cs35l41.h @@ -767,5 +767,7 @@ int cs35l41_register_errata_patch(struct device *dev, struct regmap *reg, unsign int cs35l41_set_channels(struct device *dev, struct regmap *reg, unsigned int tx_num, unsigned int *tx_slot, unsigned int rx_num, unsigned int *rx_slot); +int cs35l41_boost_config(struct device *dev, struct regmap *regmap, int boost_ind, int boost_cap, + int boost_ipk); #endif /* __CS35L41_H */ -- cgit v1.2.3 From 9a45ac2320d0a6ae01880a30d4b86025fce4061b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 22 Dec 2021 22:41:18 -0500 Subject: fbdev: fbmem: add a helper to determine if an aperture is used by a fw fb Add a function for drivers to check if the a firmware initialized fb is corresponds to their aperture. This allows drivers to check if the device corresponds to what the firmware set up as the display device. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=215203 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1840 Signed-off-by: Alex Deucher --- include/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index 6f3db99ab990..3da95842b207 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -610,6 +610,7 @@ extern int remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const char *name); extern int remove_conflicting_framebuffers(struct apertures_struct *a, const char *name, bool primary); +extern bool is_firmware_framebuffer(struct apertures_struct *a); extern int fb_prepare_logo(struct fb_info *fb_info, int rotate); extern int fb_show_logo(struct fb_info *fb_info, int rotate); extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); -- cgit v1.2.3 From 1b4e3f26f9f7553b260b8aed43967500961448a6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 2 Dec 2021 15:06:14 +0000 Subject: mm: vmscan: Reduce throttling due to a failure to make progress Mike Galbraith, Alexey Avramov and Darrick Wong all reported similar problems due to reclaim throttling for excessive lengths of time. In Alexey's case, a memory hog that should go OOM quickly stalls for several minutes before stalling. In Mike and Darrick's cases, a small memcg environment stalled excessively even though the system had enough memory overall. Commit 69392a403f49 ("mm/vmscan: throttle reclaim when no progress is being made") introduced the problem although commit a19594ca4a8b ("mm/vmscan: increase the timeout if page reclaim is not making progress") made it worse. Systems at or near an OOM state that cannot be recovered must reach OOM quickly and memcg should kill tasks if a memcg is near OOM. To address this, only stall for the first zone in the zonelist, reduce the timeout to 1 tick for VMSCAN_THROTTLE_NOPROGRESS and only stall if the scan control nr_reclaimed is 0, kswapd is still active and there were excessive pages pending for writeback. If kswapd has stopped reclaiming due to excessive failures, do not stall at all so that OOM triggers relatively quickly. Similarly, if an LRU is simply congested, only lightly throttle similar to NOPROGRESS. Alexey's original case was the most straight forward for i in {1..3}; do tail /dev/zero; done On vanilla 5.16-rc1, this test stalled heavily, after the patch the test completes in a few seconds similar to 5.15. Alexey's second test case added watching a youtube video while tail runs 10 times. On 5.15, playback only jitters slightly, 5.16-rc1 stalls a lot with lots of frames missing and numerous audio glitches. With this patch applies, the video plays similarly to 5.15. [lkp@intel.com: Fix W=1 build warning] Link: https://lore.kernel.org/r/99e779783d6c7fce96448a3402061b9dc1b3b602.camel@gmx.de Link: https://lore.kernel.org/r/20211124011954.7cab9bb4@mail.inbox.lv Link: https://lore.kernel.org/r/20211022144651.19914-1-mgorman@techsingularity.net Link: https://lore.kernel.org/r/20211202150614.22440-1-mgorman@techsingularity.net Link: https://linux-regtracking.leemhuis.info/regzbot/regression/20211124011954.7cab9bb4@mail.inbox.lv/ Reported-and-tested-by: Alexey Avramov Reported-and-tested-by: Mike Galbraith Reported-and-tested-by: Darrick J. Wong Reported-by: kernel test robot Acked-by: Hugh Dickins Tracked-by: Thorsten Leemhuis Fixes: 69392a403f49 ("mm/vmscan: throttle reclaim when no progress is being made") Signed-off-by: Mel Gorman Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 + include/trace/events/vmscan.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 58e744b78c2c..936dc0b6c226 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -277,6 +277,7 @@ enum vmscan_throttle_state { VMSCAN_THROTTLE_WRITEBACK, VMSCAN_THROTTLE_ISOLATED, VMSCAN_THROTTLE_NOPROGRESS, + VMSCAN_THROTTLE_CONGESTED, NR_VMSCAN_THROTTLE, }; diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index f25a6149d3ba..ca2e9009a651 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -30,12 +30,14 @@ #define _VMSCAN_THROTTLE_WRITEBACK (1 << VMSCAN_THROTTLE_WRITEBACK) #define _VMSCAN_THROTTLE_ISOLATED (1 << VMSCAN_THROTTLE_ISOLATED) #define _VMSCAN_THROTTLE_NOPROGRESS (1 << VMSCAN_THROTTLE_NOPROGRESS) +#define _VMSCAN_THROTTLE_CONGESTED (1 << VMSCAN_THROTTLE_CONGESTED) #define show_throttle_flags(flags) \ (flags) ? __print_flags(flags, "|", \ {_VMSCAN_THROTTLE_WRITEBACK, "VMSCAN_THROTTLE_WRITEBACK"}, \ {_VMSCAN_THROTTLE_ISOLATED, "VMSCAN_THROTTLE_ISOLATED"}, \ - {_VMSCAN_THROTTLE_NOPROGRESS, "VMSCAN_THROTTLE_NOPROGRESS"} \ + {_VMSCAN_THROTTLE_NOPROGRESS, "VMSCAN_THROTTLE_NOPROGRESS"}, \ + {_VMSCAN_THROTTLE_CONGESTED, "VMSCAN_THROTTLE_CONGESTED"} \ ) : "VMSCAN_THROTTLE_NONE" -- cgit v1.2.3 From 79d39fc503b43b566feae5bc9a57dfcffdf41bd1 Mon Sep 17 00:00:00 2001 From: Tony Lu Date: Tue, 28 Dec 2021 21:06:10 +0800 Subject: net/smc: Add netlink net namespace support This adds net namespace ID to diag of linkgroup, helps us to distinguish different namespaces, and net_cookie is unique in the whole system. Signed-off-by: Tony Lu Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 2 ++ include/uapi/linux/smc_diag.h | 11 ++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 20f33b27787f..6c2874fd2c00 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -119,6 +119,8 @@ enum { SMC_NLA_LGR_R_CONNS_NUM, /* u32 */ SMC_NLA_LGR_R_V2_COMMON, /* nest */ SMC_NLA_LGR_R_V2, /* nest */ + SMC_NLA_LGR_R_NET_COOKIE, /* u64 */ + SMC_NLA_LGR_R_PAD, /* flag */ __SMC_NLA_LGR_R_MAX, SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1 }; diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index 8cb3a6fef553..c7008d87f1a4 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -84,11 +84,12 @@ struct smc_diag_conninfo { /* SMC_DIAG_LINKINFO */ struct smc_diag_linkinfo { - __u8 link_id; /* link identifier */ - __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ - __u8 ibport; /* RDMA device port number */ - __u8 gid[40]; /* local GID */ - __u8 peer_gid[40]; /* peer GID */ + __u8 link_id; /* link identifier */ + __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ + __u8 ibport; /* RDMA device port number */ + __u8 gid[40]; /* local GID */ + __u8 peer_gid[40]; /* peer GID */ + __aligned_u64 net_cookie; /* RDMA device net namespace */ }; struct smc_diag_lgrinfo { -- cgit v1.2.3 From f9d31c4cf4c11ff10317f038b9c6f7c3bda6cdd4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 31 Dec 2021 18:37:37 -0500 Subject: sctp: hold endpoint before calling cb in sctp_transport_lookup_process The same fix in commit 5ec7d18d1813 ("sctp: use call_rcu to free endpoint") is also needed for dumping one asoc and sock after the lookup. Fixes: 86fdb3448cc1 ("sctp: ensure ep is not destroyed before doing the dump") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index d314a180ab93..3ae61ce2eabd 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -112,8 +112,7 @@ struct sctp_transport *sctp_transport_get_next(struct net *net, struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_idx(struct net *net, struct rhashtable_iter *iter, int pos); -int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), - struct net *net, +int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, -- cgit v1.2.3 From e7026f15564fbe0c8b091f218203111f77b84eda Mon Sep 17 00:00:00 2001 From: Colin Foster Date: Tue, 28 Dec 2021 21:03:06 -0800 Subject: net: phy: lynx: refactor Lynx PCS module to use generic phylink_pcs Remove references to lynx_pcs structures so drivers like the Felix DSA can reference alternate PCS drivers. Signed-off-by: Colin Foster Signed-off-by: David S. Miller --- include/linux/pcs-lynx.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/pcs-lynx.h b/include/linux/pcs-lynx.h index a6440d6ebe95..5712cc2ce775 100644 --- a/include/linux/pcs-lynx.h +++ b/include/linux/pcs-lynx.h @@ -9,13 +9,10 @@ #include #include -struct lynx_pcs { - struct phylink_pcs pcs; - struct mdio_device *mdio; -}; +struct mdio_device *lynx_get_mdio_device(struct phylink_pcs *pcs); -struct lynx_pcs *lynx_pcs_create(struct mdio_device *mdio); +struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio); -void lynx_pcs_destroy(struct lynx_pcs *pcs); +void lynx_pcs_destroy(struct phylink_pcs *pcs); #endif /* __LINUX_PCS_LYNX_H */ -- cgit v1.2.3 From ece014141cd4b49f2d763f28b19e417b84460560 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 3 May 2021 07:29:47 -0400 Subject: mm/doc: Add documentation for folio_test_uptodate Move the PG_uptodate documentation to be documentation for folio_test_uptodate() and expand on it a little. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/page-flags.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b5f14d581113..b3d353d537e2 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -68,9 +68,6 @@ * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as * a result of MADV_FREE). * - * PG_uptodate tells whether the page's contents is valid. When a read - * completes, the page becomes uptodate, unless a disk I/O error happened. - * * PG_referenced, PG_reclaim are used for page reclaim for anonymous and * file-backed pagecache (see mm/vmscan.c). * @@ -615,6 +612,16 @@ TESTPAGEFLAG_FALSE(Ksm, ksm) u64 stable_page_flags(struct page *page); +/** + * folio_test_uptodate - Is this folio up to date? + * @folio: The folio. + * + * The uptodate flag is set on a folio when every byte in the folio is + * at least as new as the corresponding bytes on storage. Anonymous + * and CoW folios are always uptodate. If the folio is not uptodate, + * some of the bytes in it may be; see the is_partially_uptodate() + * address_space operation. + */ static inline bool folio_test_uptodate(struct folio *folio) { bool ret = test_bit(PG_uptodate, folio_flags(folio, 0)); -- cgit v1.2.3 From 10331795fb7991a39ebd0330fdb074cbd81fef48 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Dec 2021 15:24:51 -0500 Subject: pagevec: Add folio_batch The folio_batch is the same as the pagevec, except that it is typed to contain folios and not pages. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski --- include/linux/pagevec.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'include') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 7f3f19065a9f..c3fa616d7ae7 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -15,8 +15,10 @@ #define PAGEVEC_SIZE 15 struct page; +struct folio; struct address_space; +/* Layout must match folio_batch */ struct pagevec { unsigned char nr; bool percpu_pvec_drained; @@ -81,4 +83,71 @@ static inline void pagevec_release(struct pagevec *pvec) __pagevec_release(pvec); } +/** + * struct folio_batch - A collection of folios. + * + * The folio_batch is used to amortise the cost of retrieving and + * operating on a set of folios. The order of folios in the batch may be + * significant (eg delete_from_page_cache_batch()). Some users of the + * folio_batch store "exceptional" entries in it which can be removed + * by calling folio_batch_remove_exceptionals(). + */ +struct folio_batch { + unsigned char nr; + bool percpu_pvec_drained; + struct folio *folios[PAGEVEC_SIZE]; +}; + +/* Layout must match pagevec */ +static_assert(sizeof(struct pagevec) == sizeof(struct folio_batch)); +static_assert(offsetof(struct pagevec, pages) == + offsetof(struct folio_batch, folios)); + +/** + * folio_batch_init() - Initialise a batch of folios + * @fbatch: The folio batch. + * + * A freshly initialised folio_batch contains zero folios. + */ +static inline void folio_batch_init(struct folio_batch *fbatch) +{ + fbatch->nr = 0; +} + +static inline unsigned int folio_batch_count(struct folio_batch *fbatch) +{ + return fbatch->nr; +} + +static inline unsigned int fbatch_space(struct folio_batch *fbatch) +{ + return PAGEVEC_SIZE - fbatch->nr; +} + +/** + * folio_batch_add() - Add a folio to a batch. + * @fbatch: The folio batch. + * @folio: The folio to add. + * + * The folio is added to the end of the batch. + * The batch must have previously been initialised using folio_batch_init(). + * + * Return: The number of slots still available. + */ +static inline unsigned folio_batch_add(struct folio_batch *fbatch, + struct folio *folio) +{ + fbatch->folios[fbatch->nr++] = folio; + return fbatch_space(fbatch); +} + +static inline void folio_batch_release(struct folio_batch *fbatch) +{ + pagevec_release((struct pagevec *)fbatch); +} + +static inline void folio_batch_remove_exceptionals(struct folio_batch *fbatch) +{ + pagevec_remove_exceptionals((struct pagevec *)fbatch); +} #endif /* _LINUX_PAGEVEC_H */ -- cgit v1.2.3 From e66d70c034dbdfe1a48863f0865ac86aaf2fef1a Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 13 Dec 2021 10:51:41 +0530 Subject: dmaengine: xilinx_dpdma: use correct SDPX tag for header file Commit 188c310bdd5d ("dmaengine: xilinx_dpdma: stop using slave_id field") add the header file with incorrect format for SPDX tag, fix that WARNING: Improper SPDX comment style for 'include/linux/dma/xilinx_dpdma.h', please use '/*' instead #1: FILE: include/linux/dma/xilinx_dpdma.h:1: +// SPDX-License-Identifier: GPL-2.0 WARNING: Missing or malformed SPDX-License-Identifier tag in line 1 #1: FILE: include/linux/dma/xilinx_dpdma.h:1: +// SPDX-License-Identifier: GPL-2.0 Fixes: 188c310bdd5d ("dmaengine: xilinx_dpdma: stop using slave_id field") Signed-off-by: Vinod Koul Link: https://lore.kernel.org/r/20211213052141.850807-1-vkoul@kernel.org Signed-off-by: Vinod Koul --- include/linux/dma/xilinx_dpdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dma/xilinx_dpdma.h b/include/linux/dma/xilinx_dpdma.h index 83a1377f03f8..02a4adf8921b 100644 --- a/include/linux/dma/xilinx_dpdma.h +++ b/include/linux/dma/xilinx_dpdma.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_DMA_XILINX_DPDMA_H #define __LINUX_DMA_XILINX_DPDMA_H -- cgit v1.2.3 From 04ce4a6b9b7b84eb6be7b544d3d0e748b6837764 Mon Sep 17 00:00:00 2001 From: Aswath Govindraju Date: Sun, 2 Jan 2022 23:38:12 +0100 Subject: dt-bindings: ti-serdes-mux: Add defines for J721S2 SoC There are 4 lanes in the single instance of J721S2 SERDES. Each SERDES lane mux can select upto 4 different IPs. Define all the possible functions. Signed-off-by: Aswath Govindraju Acked-by: Rob Herring Signed-off-by: Peter Rosin Link: https://lore.kernel.org/r/0571fd6b-ec4d-71b3-5cf7-6fa48ed5592c@axentia.se Signed-off-by: Greg Kroah-Hartman --- include/dt-bindings/mux/ti-serdes.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/mux/ti-serdes.h b/include/dt-bindings/mux/ti-serdes.h index d417b9268b16..d3116c52ab72 100644 --- a/include/dt-bindings/mux/ti-serdes.h +++ b/include/dt-bindings/mux/ti-serdes.h @@ -95,4 +95,26 @@ #define AM64_SERDES0_LANE0_PCIE0 0x0 #define AM64_SERDES0_LANE0_USB 0x1 +/* J721S2 */ + +#define J721S2_SERDES0_LANE0_EDP_LANE0 0x0 +#define J721S2_SERDES0_LANE0_PCIE1_LANE0 0x1 +#define J721S2_SERDES0_LANE0_IP3_UNUSED 0x2 +#define J721S2_SERDES0_LANE0_IP4_UNUSED 0x3 + +#define J721S2_SERDES0_LANE1_EDP_LANE1 0x0 +#define J721S2_SERDES0_LANE1_PCIE1_LANE1 0x1 +#define J721S2_SERDES0_LANE1_USB 0x2 +#define J721S2_SERDES0_LANE1_IP4_UNUSED 0x3 + +#define J721S2_SERDES0_LANE2_EDP_LANE2 0x0 +#define J721S2_SERDES0_LANE2_PCIE1_LANE2 0x1 +#define J721S2_SERDES0_LANE2_IP3_UNUSED 0x2 +#define J721S2_SERDES0_LANE2_IP4_UNUSED 0x3 + +#define J721S2_SERDES0_LANE3_EDP_LANE3 0x0 +#define J721S2_SERDES0_LANE3_PCIE1_LANE3 0x1 +#define J721S2_SERDES0_LANE3_USB 0x2 +#define J721S2_SERDES0_LANE3_IP4_UNUSED 0x3 + #endif /* _DT_BINDINGS_MUX_TI_SERDES */ -- cgit v1.2.3 From ccae4a19c9140a34a0c5f0658812496dd8bbdeaf Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 25 Oct 2021 17:31:54 +0100 Subject: btrfs: remove no longer needed logic for replaying directory deletes Now that we log only dir index keys when logging a directory, we no longer need to deal with dir item keys in the log replay code for replaying directory deletes. This is also true for the case when we replay a log tree created by a kernel that still logs dir items. So remove the remaining code of the replay of directory deletes algorithm that deals with dir item keys. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index e1c4c732aaba..5416f1f1a77a 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -146,7 +146,9 @@ /* * dir items are the name -> inode pointers in a directory. There is one - * for every name in a directory. + * for every name in a directory. BTRFS_DIR_LOG_ITEM_KEY is no longer used + * but it's still defined here for documentation purposes and to help avoid + * having its numerical value reused in the future. */ #define BTRFS_DIR_LOG_ITEM_KEY 60 #define BTRFS_DIR_LOG_INDEX_KEY 72 -- cgit v1.2.3 From 2e4e97abac4c95f8b87b2912ea013f7836a6f10b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 5 Nov 2021 16:45:29 -0400 Subject: btrfs: pass fs_info to trace_btrfs_transaction_commit The root on the trans->root can be anything, and generally we're committing from the transaction kthread so it's usually the tree_root. Change this to just take an fs_info, and to maintain compatibility simply put the ROOT_TREE_OBJECTID as the root objectid for the tracepoint. This will allow use to remove trans->root. Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 8f58fd95efc7..0d729664b4b4 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -182,18 +182,18 @@ FLUSH_STATES TRACE_EVENT(btrfs_transaction_commit, - TP_PROTO(const struct btrfs_root *root), + TP_PROTO(const struct btrfs_fs_info *fs_info), - TP_ARGS(root), + TP_ARGS(fs_info), TP_STRUCT__entry_btrfs( __field( u64, generation ) __field( u64, root_objectid ) ), - TP_fast_assign_btrfs(root->fs_info, - __entry->generation = root->fs_info->generation; - __entry->root_objectid = root->root_key.objectid; + TP_fast_assign_btrfs(fs_info, + __entry->generation = fs_info->generation; + __entry->root_objectid = BTRFS_ROOT_TREE_OBJECTID; ), TP_printk_btrfs("root=%llu(%s) gen=%llu", -- cgit v1.2.3 From 25fd330370ac40653671f323acc7fb6db27ef6fe Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 15 Dec 2021 02:01:18 +0100 Subject: power: supply_core: Pass pointer to battery info The function to retrieve battery info (from the device tree) assumes we have a static info struct that gets populated by calling into power_supply_get_battery_info(). This is awkward since I want to support tables of static battery info by just assigning a pointer to all info based on e.g. a compatible value in the device tree. We also have a mixture of static and dynamically allocated variables here. Bite the bullet and let power_supply_get_battery_info() allocate also the memory used for the very top level struct power_supply_battery_info container. Pass pointers around and lifecycle this with the psy device just like the stuff we allocate inside it. Change all current users over. As part of the change, initializers need to be added to some previously uninitialized fields in struct power_supply_battery_info. Reviewed-By: Matti Vaittinen Signed-off-by: Linus Walleij Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index f6e94eae4f28..86b4d5c4dab9 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -575,7 +575,7 @@ devm_power_supply_get_by_phandle(struct device *dev, const char *property) #endif /* CONFIG_OF */ extern int power_supply_get_battery_info(struct power_supply *psy, - struct power_supply_battery_info *info); + struct power_supply_battery_info **info_out); extern void power_supply_put_battery_info(struct power_supply *psy, struct power_supply_battery_info *info); extern int power_supply_ocv2cap_simple(struct power_supply_battery_ocv_table *table, -- cgit v1.2.3 From d5dbcca70182501bed99de85c224cef04c38ed92 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 Jan 2022 17:24:08 +0100 Subject: pktcdvd: convert to use attribute groups There is no need to create kobject children of the pktcdvd device just to display a subdirectory name. Instead, use a named attribute group which removes the extra kobjects and also fixes the userspace race where the device is created yet tools like libudev can not see the attributes as they think the subdirectories are some other sort of device. Cc: linux-block@vger.kernel.org Cc: Jens Axboe Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20220103162408.742003-1-gregkh@linuxfoundation.org Signed-off-by: Jens Axboe --- include/linux/pktcdvd.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index c391e694aa26..f9c5ac80d59b 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -152,14 +152,6 @@ struct packet_stacked_data }; #define PSD_POOL_SIZE 64 -struct pktcdvd_kobj -{ - struct kobject kobj; - struct pktcdvd_device *pd; -}; -#define to_pktcdvdkobj(_k) \ - ((struct pktcdvd_kobj*)container_of(_k,struct pktcdvd_kobj,kobj)) - struct pktcdvd_device { struct block_device *bdev; /* dev attached */ @@ -197,8 +189,6 @@ struct pktcdvd_device int write_congestion_on; struct device *dev; /* sysfs pktcdvd[0-7] dev */ - struct pktcdvd_kobj *kobj_stat; /* sysfs pktcdvd[0-7]/stat/ */ - struct pktcdvd_kobj *kobj_wqueue; /* sysfs pktcdvd[0-7]/write_queue/ */ struct dentry *dfs_d_root; /* debugfs: devname directory */ struct dentry *dfs_f_info; /* debugfs: info file */ -- cgit v1.2.3 From 6c952a0dc9c3ced98c4c8aa7cd11c25c59157f1f Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:26 +0100 Subject: ata: libata: Add ata_port_classify() helper Add an ata_port_classify() helper to print out the results from the device classification and remove the debugging statements from ata_dev_classify(). Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 2a8404b26083..235fdbeb19ea 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1160,6 +1160,8 @@ extern enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc); extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg, unsigned int n_elem); extern unsigned int ata_dev_classify(const struct ata_taskfile *tf); +extern unsigned int ata_port_classify(struct ata_port *ap, + const struct ata_taskfile *tf); extern void ata_dev_disable(struct ata_device *adev); extern void ata_id_string(const u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); -- cgit v1.2.3 From f8ec26d0f5bcdb864f771fb6d250d9ed3165eb61 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:29 +0100 Subject: ata: libata: add reset tracepoints To follow the flow of control we should be using tracepoints, as they will tie in with the actual I/O flow and deliver a better overview about what it happening. This patch adds tracepoints for hard reset, soft reset, and postreset and adds them in the libata-eh control flow. With that we can drop the reset DPRINTK calls in the various drivers. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/trace/events/libata.h | 96 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) (limited to 'include') diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index ab69434e2329..ec2a350d1aca 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -132,6 +132,22 @@ ata_protocol_name(ATAPI_PROT_PIO), \ ata_protocol_name(ATAPI_PROT_DMA)) +#define ata_class_name(class) { class, #class } +#define show_class_name(val) \ + __print_symbolic(val, \ + ata_class_name(ATA_DEV_UNKNOWN), \ + ata_class_name(ATA_DEV_ATA), \ + ata_class_name(ATA_DEV_ATA_UNSUP), \ + ata_class_name(ATA_DEV_ATAPI), \ + ata_class_name(ATA_DEV_ATAPI_UNSUP), \ + ata_class_name(ATA_DEV_PMP), \ + ata_class_name(ATA_DEV_PMP_UNSUP), \ + ata_class_name(ATA_DEV_SEMB), \ + ata_class_name(ATA_DEV_SEMB_UNSUP), \ + ata_class_name(ATA_DEV_ZAC), \ + ata_class_name(ATA_DEV_ZAC_UNSUP), \ + ata_class_name(ATA_DEV_NONE)) + const char *libata_trace_parse_status(struct trace_seq*, unsigned char); #define __parse_status(s) libata_trace_parse_status(p, s) @@ -329,6 +345,86 @@ TRACE_EVENT(ata_eh_link_autopsy_qc, __parse_eh_err_mask(__entry->eh_err_mask)) ); +DECLARE_EVENT_CLASS(ata_link_reset_begin_template, + + TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline), + + TP_ARGS(link, class, deadline), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __array( unsigned int, class, 2 ) + __field( unsigned long, deadline ) + ), + + TP_fast_assign( + __entry->ata_port = link->ap->print_id; + memcpy(__entry->class, class, 2); + __entry->deadline = deadline; + ), + + TP_printk("ata_port=%u deadline=%lu classes=[%s,%s]", + __entry->ata_port, __entry->deadline, + show_class_name(__entry->class[0]), + show_class_name(__entry->class[1])) +); + +DEFINE_EVENT(ata_link_reset_begin_template, ata_link_hardreset_begin, + TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline), + TP_ARGS(link, class, deadline)); + +DEFINE_EVENT(ata_link_reset_begin_template, ata_slave_hardreset_begin, + TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline), + TP_ARGS(link, class, deadline)); + +DEFINE_EVENT(ata_link_reset_begin_template, ata_link_softreset_begin, + TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline), + TP_ARGS(link, class, deadline)); + +DECLARE_EVENT_CLASS(ata_link_reset_end_template, + + TP_PROTO(struct ata_link *link, unsigned int *class, int rc), + + TP_ARGS(link, class, rc), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __array( unsigned int, class, 2 ) + __field( int, rc ) + ), + + TP_fast_assign( + __entry->ata_port = link->ap->print_id; + memcpy(__entry->class, class, 2); + __entry->rc = rc; + ), + + TP_printk("ata_port=%u rc=%d class=[%s,%s]", + __entry->ata_port, __entry->rc, + show_class_name(__entry->class[0]), + show_class_name(__entry->class[1])) +); + +DEFINE_EVENT(ata_link_reset_end_template, ata_link_hardreset_end, + TP_PROTO(struct ata_link *link, unsigned int *class, int rc), + TP_ARGS(link, class, rc)); + +DEFINE_EVENT(ata_link_reset_end_template, ata_slave_hardreset_end, + TP_PROTO(struct ata_link *link, unsigned int *class, int rc), + TP_ARGS(link, class, rc)); + +DEFINE_EVENT(ata_link_reset_end_template, ata_link_softreset_end, + TP_PROTO(struct ata_link *link, unsigned int *class, int rc), + TP_ARGS(link, class, rc)); + +DEFINE_EVENT(ata_link_reset_end_template, ata_link_postreset, + TP_PROTO(struct ata_link *link, unsigned int *class, int rc), + TP_ARGS(link, class, rc)); + +DEFINE_EVENT(ata_link_reset_end_template, ata_slave_postreset, + TP_PROTO(struct ata_link *link, unsigned int *class, int rc), + TP_ARGS(link, class, rc)); + #endif /* _TRACE_LIBATA_H */ /* This part must be outside protection */ -- cgit v1.2.3 From fc914faad67f237528739ec717222a560c97e723 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:30 +0100 Subject: ata: libata: add qc_prep tracepoint Convert the existing ata_qc_issue() tracepoint into a template, and add tracepoints for ata_qc_prep() and ata_qc_issue() based on that template. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/trace/events/libata.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index ec2a350d1aca..2394fc2b2831 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -164,7 +164,7 @@ const char *libata_trace_parse_subcmd(struct trace_seq *, unsigned char, unsigned char, unsigned char); #define __parse_subcmd(c,f,h) libata_trace_parse_subcmd(p, c, f, h) -TRACE_EVENT(ata_qc_issue, +DECLARE_EVENT_CLASS(ata_qc_issue_template, TP_PROTO(struct ata_queued_cmd *qc), @@ -223,6 +223,14 @@ TRACE_EVENT(ata_qc_issue, __entry->dev) ); +DEFINE_EVENT(ata_qc_issue_template, ata_qc_prep, + TP_PROTO(struct ata_queued_cmd *qc), + TP_ARGS(qc)); + +DEFINE_EVENT(ata_qc_issue_template, ata_qc_issue, + TP_PROTO(struct ata_queued_cmd *qc), + TP_ARGS(qc)); + DECLARE_EVENT_CLASS(ata_qc_complete_template, TP_PROTO(struct ata_queued_cmd *qc), -- cgit v1.2.3 From 9795ded7f924b6486e54976619b1b094fcc1969d Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Mon, 3 Jan 2022 13:44:50 +0200 Subject: net/sched: act_ct: Fill offloading tuple iifidx Driver offloading ct tuples can use the information of which devices received the packets that created the offloaded connections, to more efficiently offload them only to the relevant device. Add new act_ct nf conntrack extension, which is used to store the skb devices before offloading the connection, and then fill in the tuple iifindex so drivers can get the device via metadata dissector match. Signed-off-by: Oz Shlomo Signed-off-by: Paul Blakey Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_act_ct.h | 50 +++++++++++++++++++++++++++++ include/net/netfilter/nf_conntrack_extend.h | 4 +++ 2 files changed, 54 insertions(+) create mode 100644 include/net/netfilter/nf_conntrack_act_ct.h (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_act_ct.h b/include/net/netfilter/nf_conntrack_act_ct.h new file mode 100644 index 000000000000..078d3c52c03f --- /dev/null +++ b/include/net/netfilter/nf_conntrack_act_ct.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NF_CONNTRACK_ACT_CT_H +#define _NF_CONNTRACK_ACT_CT_H + +#include +#include +#include + +struct nf_conn_act_ct_ext { + int ifindex[IP_CT_DIR_MAX]; +}; + +static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct nf_conn *ct) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + return nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); +#else + return NULL; +#endif +} + +static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn *ct) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); + + if (act_ct) + return act_ct; + + act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC); + return act_ct; +#else + return NULL; +#endif +} + +static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct_ext; + + act_ct_ext = nf_conn_act_ct_ext_find(ct); + if (dev_net(skb->dev) == &init_net && act_ct_ext) + act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; +#endif +} + +#endif /* _NF_CONNTRACK_ACT_CT_H */ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index e1e588387103..c7515d82ab06 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -27,6 +27,9 @@ enum nf_ct_ext_id { #endif #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) NF_CT_EXT_SYNPROXY, +#endif +#if IS_ENABLED(CONFIG_NET_ACT_CT) + NF_CT_EXT_ACT_CT, #endif NF_CT_EXT_NUM, }; @@ -40,6 +43,7 @@ enum nf_ct_ext_id { #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels #define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy +#define NF_CT_EXT_ACT_CT_TYPE struct nf_conn_act_ct_ext /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { -- cgit v1.2.3 From fa55a7d745de2d10489295b0674a403e2a5d490d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 3 Jan 2022 18:11:30 +0100 Subject: seg6: export get_srh() for ICMP handling An ICMP error message can contain in its message body part of an IPv6 packet which invoked the error. Such a packet might contain a segment router header. Export get_srh() so the ICMP code can make use of it. Since his changes the scope of the function from local to global, add the seg6_ prefix to keep the namespace clean. And move it into seg6.c so it is always available, not just when IPV6_SEG6_LWTUNNEL is enabled. Signed-off-by: Andrew Lunn Reviewed-by: David Ahern Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/seg6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/seg6.h b/include/net/seg6.h index 9d19c15e8545..a6f25983670a 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -58,6 +58,7 @@ extern int seg6_local_init(void); extern void seg6_local_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced); +extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags); extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto); extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); -- cgit v1.2.3 From e41294408c56c68ea0f269d757527bf33b39118a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 3 Jan 2022 18:11:31 +0100 Subject: icmp: ICMPV6: Examine invoking packet for Segment Route Headers. RFC8754 says: ICMP error packets generated within the SR domain are sent to source nodes within the SR domain. The invoking packet in the ICMP error message may contain an SRH. Since the destination address of a packet with an SRH changes as each segment is processed, it may not be the destination used by the socket or application that generated the invoking packet. For the source of an invoking packet to process the ICMP error message, the ultimate destination address of the IPv6 header may be required. The following logic is used to determine the destination address for use by protocol-error handlers. * Walk all extension headers of the invoking IPv6 packet to the routing extension header preceding the upper-layer header. - If routing header is type 4 Segment Routing Header (SRH) o The SID at Segment List[0] may be used as the destination address of the invoking packet. Mangle the skb so the network header points to the invoking packet inside the ICMP packet. The seg6 helpers can then be used on the skb to find any segment routing headers. If found, mark this fact in the IPv6 control block of the skb, and store the offset into the packet of the SRH. Then restore the skb back to its old state. Signed-off-by: Andrew Lunn Reviewed-by: David Ahern Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 ++ include/net/seg6.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 20c1f968da7c..a59d25f19385 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -133,6 +133,7 @@ struct inet6_skb_parm { __u16 dsthao; #endif __u16 frag_max_size; + __u16 srhoff; #define IP6SKB_XFRM_TRANSFORMED 1 #define IP6SKB_FORWARDED 2 @@ -142,6 +143,7 @@ struct inet6_skb_parm { #define IP6SKB_HOPBYHOP 32 #define IP6SKB_L3SLAVE 64 #define IP6SKB_JUMBOGRAM 128 +#define IP6SKB_SEG6 256 }; #if defined(CONFIG_NET_L3_MASTER_DEV) diff --git a/include/net/seg6.h b/include/net/seg6.h index a6f25983670a..02b0cd305787 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -59,6 +59,7 @@ extern void seg6_local_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced); extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags); +extern void seg6_icmp_srh(struct sk_buff *skb, struct inet6_skb_parm *opt); extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto); extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); -- cgit v1.2.3 From 222a011efc839ca1f51bf89fe7a2b3705fa55ccd Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 3 Jan 2022 18:11:32 +0100 Subject: udp6: Use Segment Routing Header for dest address if present When finding the socket to report an error on, if the invoking packet is using Segment Routing, the IPv6 destination address is that of an intermediate router, not the end destination. Extract the ultimate destination address from the segment address. This change allows traceroute to function in the presence of Segment Routing. Signed-off-by: Andrew Lunn Reviewed-by: David Ahern Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/seg6.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/net/seg6.h b/include/net/seg6.h index 02b0cd305787..af668f17b398 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -65,4 +65,23 @@ extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, u32 tbl_id); + +/* If the packet which invoked an ICMP error contains an SRH return + * the true destination address from within the SRH, otherwise use the + * destination address in the IP header. + */ +static inline const struct in6_addr *seg6_get_daddr(struct sk_buff *skb, + struct inet6_skb_parm *opt) +{ + struct ipv6_sr_hdr *srh; + + if (opt->flags & IP6SKB_SEG6) { + srh = (struct ipv6_sr_hdr *)(skb->data + opt->srhoff); + return &srh->segments[0]; + } + + return NULL; +} + + #endif -- cgit v1.2.3 From 1de6b15a434c0068253fea5d719f71143e7e3a79 Mon Sep 17 00:00:00 2001 From: xu xin Date: Tue, 4 Jan 2022 10:59:34 +0000 Subject: Namespaceify min_pmtu sysctl This patch enables the sysctl min_pmtu to be configured per net namespace. Signed-off-by: xu xin Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 6c5b2efc4f17..1ecbf82b07f1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -85,6 +85,8 @@ struct netns_ipv4 { int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; + u32 ip_rt_min_pmtu; + struct local_ports ip_local_ports; u8 sysctl_tcp_ecn; -- cgit v1.2.3 From 1135fad204805518462c1f0caaca6bcd52ba78cf Mon Sep 17 00:00:00 2001 From: xu xin Date: Tue, 4 Jan 2022 10:59:47 +0000 Subject: Namespaceify mtu_expires sysctl This patch enables the sysctl mtu_expires to be configured per net namespace. Signed-off-by: xu xin Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 1ecbf82b07f1..78557643526e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -86,6 +86,7 @@ struct netns_ipv4 { int sysctl_icmp_ratemask; u32 ip_rt_min_pmtu; + int ip_rt_mtu_expires; struct local_ports ip_local_ports; -- cgit v1.2.3 From f083266487690124481eac0869da850406fb3ed3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 29 Aug 2021 09:18:53 +0200 Subject: headers/uninline: Uninline single-use function: kobject_has_children() This was the only usage of in , so we'll able to decouple the two after this change. Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index ad90b49824dc..c7b47399b36a 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -117,23 +117,6 @@ extern void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid); extern char *kobject_get_path(struct kobject *kobj, gfp_t flag); -/** - * kobject_has_children - Returns whether a kobject has children. - * @kobj: the object to test - * - * This will return whether a kobject has other kobjects as children. - * - * It does NOT account for the presence of attribute files, only sub - * directories. It also assumes there is no concurrent addition or - * removal of such children, and thus relies on external locking. - */ -static inline bool kobject_has_children(struct kobject *kobj) -{ - WARN_ON_ONCE(kref_read(&kobj->kref) == 0); - - return kobj->sd && kobj->sd->dir.subdirs; -} - struct kobj_type { void (*release)(struct kobject *kobj); const struct sysfs_ops *sysfs_ops; -- cgit v1.2.3 From 57a18322227134e37b693ef8ef216ed7ce7ba7d6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 30 Dec 2021 20:31:19 +0100 Subject: ACPI / x86: Introduce an acpi_quirk_skip_acpi_ac_and_battery() helper Some x86 ACPI boards have broken AC and battery ACPI devices in their ACPI tables. This is often tied to these devices using certain PMICs where the factory OS image seems to be using native charger and fuel-gauge drivers instead. So far both the AC and battery drivers have almost identical checks for these PMICs including both of them having a DMI based mechanism to force usage of the ACPI AC and battery drivers on some boards even though one of these PMICs is present, with the same 2 boards listed in both driver's DMI tables for this. The only difference is that the AC driver checks for 2 PMICs and the battery driver only for one. This has grown this way because the other (Whiskey Cove) PMIC is only used on a few boards (3 known boards) and although some of these do have non working ACPI battery devices, their _STA method always returns 0, but that really should not be relied on. This patch factors out the shared checks into a new acpi_quirk_skip_acpi_ac_and_battery() helper and moves the AC and battery drivers over to this new helper. Note the DMI table is shared with acpi_quirk_skip_i2c_client_enumeration() and acpi_quirk_skip_serdev_enumeration(), because boards needing DMI quirks for either of these typically also have broken AC and battery ACPI devices. The ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY quirk is not set yet on boards already in this DMI table, to avoid introducing any functional changes in this refactoring patch. Besided sharing the code between the AC and battery drivers this refactoring also moves this quirk handling to under #ifdef CONFIG_X86, removing this x86 specific code from non x86 ACPI builds. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 80c425bc98a7..04b9ace9689b 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -615,12 +615,17 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev); #ifdef CONFIG_X86 bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status); +bool acpi_quirk_skip_acpi_ac_and_battery(void); #else static inline bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status) { return false; } +static inline bool acpi_quirk_skip_acpi_ac_and_battery(void) +{ + return false; +} #endif #if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS) -- cgit v1.2.3 From acb99b9b2a08f439a13eafffd2a59b798d983462 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Tue, 28 Dec 2021 22:14:53 +0100 Subject: mac80211: Add stations iterator where the iterator function may sleep ieee80211_iterate_active_interfaces() and ieee80211_iterate_active_interfaces_atomic() already exist, where the former allows the iterator function to sleep. Add ieee80211_iterate_stations() which is similar to ieee80211_iterate_stations_atomic() but allows the iterator to sleep. This is needed for adding SDIO support to the rtw88 driver. Some interators there are reading or writing registers. With the SDIO ops (sdio_readb, sdio_writeb and friends) this means that the iterator function may sleep. Signed-off-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20211228211501.468981-2-martin.blumenstingl@googlemail.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8907338d52b5..c50221d7e82c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5614,6 +5614,9 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, * This function iterates over the interfaces associated with a given * hardware that are currently active and calls the callback for them. * This version can only be used while holding the wiphy mutex. + * The driver must not call this with a lock held that it can also take in + * response to callbacks from mac80211, and it must not call this within + * callbacks made by mac80211 - both would result in deadlocks. * * @hw: the hardware struct of which the interfaces should be iterated over * @iter_flags: iteration flags, see &enum ieee80211_interface_iteration_flags @@ -5627,6 +5630,24 @@ void ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw, struct ieee80211_vif *vif), void *data); +/** + * ieee80211_iterate_stations - iterate stations + * + * This function iterates over all stations associated with a given + * hardware that are currently uploaded to the driver and calls the callback + * function for them. + * This function allows the iterator function to sleep, when the iterator + * function is atomic @ieee80211_iterate_stations_atomic can be used. + * + * @hw: the hardware struct of which the interfaces should be iterated over + * @iterator: the iterator function to call, cannot sleep + * @data: first argument of the iterator function + */ +void ieee80211_iterate_stations(struct ieee80211_hw *hw, + void (*iterator)(void *data, + struct ieee80211_sta *sta), + void *data); + /** * ieee80211_iterate_stations_atomic - iterate stations * -- cgit v1.2.3 From 5bc03b28ec24670e67c453ecb2bfbb9053b86838 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 20 Dec 2021 11:51:47 +0100 Subject: nl80211: clarify comment for mesh PLINK_BLOCKED state When a mesh link is in blocked state, it is very useful to still allow auth requests from the peer to re-establish it. When a remote node is power cycled, the peer state can easily end up in blocked state if multiple auth attempts are performed. Since this can lead to several minutes of downtime, we should accept auth attempts of the peer after it has come back. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20211220105147.88625-1-nbd@nbd.name Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f1a9d6594df2..195a238a322e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5623,7 +5623,7 @@ enum nl80211_if_combination_attrs { * @NL80211_PLINK_ESTAB: mesh peer link is established * @NL80211_PLINK_HOLDING: mesh peer link is being closed or cancelled * @NL80211_PLINK_BLOCKED: all frames transmitted from this mesh - * plink are discarded + * plink are discarded, except for authentication frames * @NUM_NL80211_PLINK_STATES: number of peer link states * @MAX_NL80211_PLINK_STATES: highest numerical value of plink states */ -- cgit v1.2.3 From 6c9eeb5f4a9bb2b11a40fd0f15efde7bd33ee908 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 4 Jan 2022 17:19:40 +0200 Subject: KVM: arm64: vgic: Replace kernel.h with the necessary inclusions arm_vgic.h does not require all the stuff that kernel.h provides. Replace kernel.h inclusion with the list of what is really being used. Signed-off-by: Andy Shevchenko Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220104151940.55399-1-andriy.shevchenko@linux.intel.com --- include/kvm/arm_vgic.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index e602d848fc1a..bb30a6803d9f 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -5,9 +5,11 @@ #ifndef __KVM_ARM_VGIC_H #define __KVM_ARM_VGIC_H -#include +#include #include #include +#include +#include #include #include #include -- cgit v1.2.3 From d9c19d32d86fa54934b632c4314beb067bf98378 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 18 Oct 2021 10:39:06 -0400 Subject: iov_iter: Add copy_folio_to_iter() This wrapper around copy_page_to_iter() works because copy_page_to_iter() handles compound pages correctly. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski --- include/linux/uio.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/uio.h b/include/linux/uio.h index 6350354f97e9..43321dbebba8 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -7,6 +7,7 @@ #include #include +#include #include struct page; @@ -146,6 +147,12 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); +static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, + size_t bytes, struct iov_iter *i) +{ + return copy_page_to_iter(&folio->page, offset, bytes, i); +} + static __always_inline __must_check size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { -- cgit v1.2.3 From 5bf34d7c7ffe773c3b3c1b6ebf39e0f34a2436ec Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 28 Nov 2021 14:24:43 -0500 Subject: mm: Add folio_test_pmd_mappable() Add a predicate to determine if the folio might be mapped by a PMD entry. If CONFIG_TRANSPARENT_HUGEPAGE is disabled, we know it can't be, even if it's large enough. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/huge_mm.h | 14 ++++++++++++++ include/linux/mm.h | 42 +++++++++++++++++++++--------------------- 2 files changed, 35 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f280f33ff223..e4c18ba8d3bf 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -274,6 +274,15 @@ static inline int thp_nr_pages(struct page *page) return 1; } +/** + * folio_test_pmd_mappable - Can we map this folio with a PMD? + * @folio: The folio to test + */ +static inline bool folio_test_pmd_mappable(struct folio *folio) +{ + return folio_order(folio) >= HPAGE_PMD_ORDER; +} + struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap); struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, @@ -339,6 +348,11 @@ static inline int thp_nr_pages(struct page *page) return 1; } +static inline bool folio_test_pmd_mappable(struct folio *folio) +{ + return false; +} + static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) { return false; diff --git a/include/linux/mm.h b/include/linux/mm.h index a7e4a9e7d807..72ca04f16711 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -714,6 +714,27 @@ int vma_is_stack_for_current(struct vm_area_struct *vma); struct mmu_gather; struct inode; +static inline unsigned int compound_order(struct page *page) +{ + if (!PageHead(page)) + return 0; + return page[1].compound_order; +} + +/** + * folio_order - The allocation order of a folio. + * @folio: The folio. + * + * A folio is composed of 2^order pages. See get_order() for the definition + * of order. + * + * Return: The order of the folio. + */ +static inline unsigned int folio_order(struct folio *folio) +{ + return compound_order(&folio->page); +} + #include /* @@ -906,27 +927,6 @@ static inline void destroy_compound_page(struct page *page) compound_page_dtors[page[1].compound_dtor](page); } -static inline unsigned int compound_order(struct page *page) -{ - if (!PageHead(page)) - return 0; - return page[1].compound_order; -} - -/** - * folio_order - The allocation order of a folio. - * @folio: The folio. - * - * A folio is composed of 2^order pages. See get_order() for the definition - * of order. - * - * Return: The order of the folio. - */ -static inline unsigned int folio_order(struct folio *folio) -{ - return compound_order(&folio->page); -} - static inline bool hpage_pincount_available(struct page *page) { /* -- cgit v1.2.3 From 9f2b04a25a41b1f41b3cead4f56854a4192ec5b0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 16 Aug 2021 23:36:31 -0400 Subject: filemap: Add folio_put_wait_locked() Convert all three callers of put_and_wait_on_page_locked() to folio_put_wait_locked(). This shrinks the kernel overall by 19 bytes. filemap_update_page() shrinks by 19 bytes while __migration_entry_wait() is unchanged. folio_put_wait_locked() is 14 bytes smaller than put_and_wait_on_page_locked(), but pmd_migration_entry_wait() grows by 14 bytes. It removes the assumption from pmd_migration_entry_wait() that pages cannot be larger than a PMD (which is true today, but may be interesting to explore in the future). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/pagemap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 605246452305..841f7ba62d7d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -868,7 +868,7 @@ static inline int wait_on_page_locked_killable(struct page *page) return folio_wait_locked_killable(page_folio(page)); } -int put_and_wait_on_page_locked(struct page *page, int state); +int folio_put_wait_locked(struct folio *folio, int state); void wait_on_page_writeback(struct page *page); void folio_wait_writeback(struct folio *folio); int folio_wait_writeback_killable(struct folio *folio); -- cgit v1.2.3 From 621db4880d305bc37b343b1671e03b7eb5d61389 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 8 May 2021 20:04:05 -0400 Subject: filemap: Add filemap_unaccount_folio() Replace unaccount_page_cache_page() with filemap_unaccount_folio(). The bug handling path could be a bit more robust (eg taking into account the mapcounts of tail pages), but it's really never supposed to happen. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/pagemap.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 841f7ba62d7d..077b6f378666 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -884,11 +884,6 @@ static inline void __set_page_dirty(struct page *page, } void folio_account_cleaned(struct folio *folio, struct address_space *mapping, struct bdi_writeback *wb); -static inline void account_page_cleaned(struct page *page, - struct address_space *mapping, struct bdi_writeback *wb) -{ - return folio_account_cleaned(page_folio(page), mapping, wb); -} void __folio_cancel_dirty(struct folio *folio); static inline void folio_cancel_dirty(struct folio *folio) { -- cgit v1.2.3 From a0580c6f9babaf4413c8a7e2ab21d68e31f4c754 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 23 Jul 2021 09:29:46 -0400 Subject: filemap: Convert tracing of page cache operations to folio Pass the folio instead of a page. The page was already implicitly a folio as it accessed page->mapping directly. Add the order of the folio to the tracepoint, as this is important information. Also drop printing the address of the struct page as the pfn provides better information than the struct page address. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/trace/events/filemap.h | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h index c47b63db124e..46c89c1e460c 100644 --- a/include/trace/events/filemap.h +++ b/include/trace/events/filemap.h @@ -15,43 +15,45 @@ DECLARE_EVENT_CLASS(mm_filemap_op_page_cache, - TP_PROTO(struct page *page), + TP_PROTO(struct folio *folio), - TP_ARGS(page), + TP_ARGS(folio), TP_STRUCT__entry( __field(unsigned long, pfn) __field(unsigned long, i_ino) __field(unsigned long, index) __field(dev_t, s_dev) + __field(unsigned char, order) ), TP_fast_assign( - __entry->pfn = page_to_pfn(page); - __entry->i_ino = page->mapping->host->i_ino; - __entry->index = page->index; - if (page->mapping->host->i_sb) - __entry->s_dev = page->mapping->host->i_sb->s_dev; + __entry->pfn = folio_pfn(folio); + __entry->i_ino = folio->mapping->host->i_ino; + __entry->index = folio->index; + if (folio->mapping->host->i_sb) + __entry->s_dev = folio->mapping->host->i_sb->s_dev; else - __entry->s_dev = page->mapping->host->i_rdev; + __entry->s_dev = folio->mapping->host->i_rdev; + __entry->order = folio_order(folio); ), - TP_printk("dev %d:%d ino %lx page=%p pfn=0x%lx ofs=%lu", + TP_printk("dev %d:%d ino %lx pfn=0x%lx ofs=%lu order=%u", MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino, - pfn_to_page(__entry->pfn), __entry->pfn, - __entry->index << PAGE_SHIFT) + __entry->index << PAGE_SHIFT, + __entry->order) ); DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_delete_from_page_cache, - TP_PROTO(struct page *page), - TP_ARGS(page) + TP_PROTO(struct folio *folio), + TP_ARGS(folio) ); DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache, - TP_PROTO(struct page *page), - TP_ARGS(page) + TP_PROTO(struct folio *folio), + TP_ARGS(folio) ); TRACE_EVENT(filemap_set_wb_err, -- cgit v1.2.3 From 452e9e6992fe058a650c81d01a9982e3faf10278 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 9 May 2021 09:33:42 -0400 Subject: filemap: Add filemap_remove_folio and __filemap_remove_folio Reimplement __delete_from_page_cache() as a wrapper around __filemap_remove_folio() and delete_from_page_cache() as a wrapper around filemap_remove_folio(). Remove the EXPORT_SYMBOL as delete_from_page_cache() was not used by any in-tree modules. Convert page_cache_free_page() into filemap_free_folio(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/pagemap.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 077b6f378666..3f26b191ede3 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -930,8 +930,13 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp); int filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp); -extern void delete_from_page_cache(struct page *page); -extern void __delete_from_page_cache(struct page *page, void *shadow); +void filemap_remove_folio(struct folio *folio); +void delete_from_page_cache(struct page *page); +void __filemap_remove_folio(struct folio *folio, void *shadow); +static inline void __delete_from_page_cache(struct page *page, void *shadow) +{ + __filemap_remove_folio(page_folio(page), shadow); +} void replace_page_cache_page(struct page *old, struct page *new); void delete_from_page_cache_batch(struct address_space *mapping, struct pagevec *pvec); -- cgit v1.2.3 From bb2e98b613a3c76c904dfa82eb4b86773817598b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 28 Nov 2021 16:14:50 -0500 Subject: filemap: Remove thp_contains() This function is now unused, so delete it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/pagemap.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 3f26b191ede3..8c2cad7f0c36 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -512,15 +512,6 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, mapping_gfp_mask(mapping)); } -/* Does this page contain this index? */ -static inline bool thp_contains(struct page *head, pgoff_t index) -{ - /* HugeTLBfs indexes the page cache in units of hpage_size */ - if (PageHuge(head)) - return head->index == index; - return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL)); -} - #define swapcache_index(folio) __page_file_index(&(folio)->page) /** -- cgit v1.2.3 From 7836d9990079ed611199819ccf487061b748193a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 27 May 2021 12:30:54 -0400 Subject: readahead: Convert page_cache_async_ra() to take a folio Using the folio here avoids checking whether it's a tail page. This patch mostly just enables some of the following patches. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/pagemap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 8c2cad7f0c36..30302be6977f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -993,7 +993,7 @@ struct readahead_control { void page_cache_ra_unbounded(struct readahead_control *, unsigned long nr_to_read, unsigned long lookahead_count); void page_cache_sync_ra(struct readahead_control *, unsigned long req_count); -void page_cache_async_ra(struct readahead_control *, struct page *, +void page_cache_async_ra(struct readahead_control *, struct folio *, unsigned long req_count); void readahead_expand(struct readahead_control *ractl, loff_t new_start, size_t new_len); @@ -1040,7 +1040,7 @@ void page_cache_async_readahead(struct address_space *mapping, struct page *page, pgoff_t index, unsigned long req_count) { DEFINE_READAHEAD(ractl, file, ra, mapping, index); - page_cache_async_ra(&ractl, page, req_count); + page_cache_async_ra(&ractl, page_folio(page), req_count); } static inline struct folio *__readahead_folio(struct readahead_control *ractl) -- cgit v1.2.3 From 539a3322f208db478db88c4a76239476defce6b1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 16 Dec 2020 11:45:30 -0500 Subject: filemap: Add read_cache_folio and read_mapping_folio Reimplement read_cache_page() as a wrapper around read_cache_folio(). Saves over 400 bytes of text from do_read_cache_folio() which more than makes up for the extra 100 bytes of text added to the various wrapper functions. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski --- include/linux/pagemap.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 30302be6977f..7bef50ea5435 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -629,8 +629,10 @@ static inline struct page *grab_cache_page(struct address_space *mapping, return find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); } -extern struct page * read_cache_page(struct address_space *mapping, - pgoff_t index, filler_t *filler, void *data); +struct folio *read_cache_folio(struct address_space *, pgoff_t index, + filler_t *filler, void *data); +struct page *read_cache_page(struct address_space *, pgoff_t index, + filler_t *filler, void *data); extern struct page * read_cache_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern int read_cache_pages(struct address_space *mapping, @@ -642,6 +644,12 @@ static inline struct page *read_mapping_page(struct address_space *mapping, return read_cache_page(mapping, index, NULL, data); } +static inline struct folio *read_mapping_folio(struct address_space *mapping, + pgoff_t index, void *data) +{ + return read_cache_folio(mapping, index, NULL, data); +} + /* * Get index of the page within radix-tree (but not for hugetlb pages). * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE) -- cgit v1.2.3 From 82c50f8b443359ec99348cd9b1289f55cd47779d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 28 Jul 2021 15:14:48 -0400 Subject: filemap: Add filemap_release_folio() Reimplement try_to_release_page() as a wrapper around filemap_release_folio(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/mm.h | 1 - include/linux/pagemap.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 72ca04f16711..145f045b0ddc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1970,7 +1970,6 @@ int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, struct page **pages); struct page *get_dump_page(unsigned long addr); -extern int try_to_release_page(struct page * page, gfp_t gfp_mask); extern void do_invalidatepage(struct page *page, unsigned int offset, unsigned int length); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 7bef50ea5435..eb6e58e106c8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -939,6 +939,8 @@ static inline void __delete_from_page_cache(struct page *page, void *shadow) void replace_page_cache_page(struct page *old, struct page *new); void delete_from_page_cache_batch(struct address_space *mapping, struct pagevec *pvec); +int try_to_release_page(struct page *page, gfp_t gfp); +bool filemap_release_folio(struct folio *folio, gfp_t gfp); loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end, int whence); -- cgit v1.2.3 From 77e2a04745ff8e391ad402e2d2d1157a5d3a7ebc Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 4 Jan 2022 19:51:08 +0000 Subject: ACPI: PCC: Implement OperationRegion handler for the PCC Type 3 subtype PCC OpRegion provides a mechanism to communicate with the platform directly from the AML. PCCT provides the list of PCC channel available in the platform, a subset or all of them can be used in PCC Opregion. This patch registers the PCC OpRegion handler before ACPI tables are loaded. This relies on the special context data passed to identify and set up the PCC channel before the OpRegion handler is executed for the first time. Typical PCC Opregion declaration looks like this: OperationRegion (PFRM, PCC, 2, 0x74) Field (PFRM, ByteAcc, NoLock, Preserve) { SIGN, 32, FLGS, 32, LEN, 32, CMD, 32, DATA, 800 } It contains four named double words followed by 100 bytes of buffer names DATA. ASL can fill out the buffer something like: /* Create global or local buffer */ Name (BUFF, Buffer (0x0C){}) /* Create double word fields over the buffer */ CreateDWordField (BUFF, 0x0, WD0) CreateDWordField (BUFF, 0x04, WD1) CreateDWordField (BUFF, 0x08, WD2) /* Fill the named fields */ WD0 = 0x50434300 SIGN = BUFF WD0 = 1 FLGS = BUFF WD0 = 0x10 LEN = BUFF /* Fill the payload in the DATA buffer */ WD0 = 0 WD1 = 0x08 WD2 = 0 DATA = BUFF /* Write to CMD field to trigger handler */ WD0 = 0x4404 CMD = BUFF This buffer is received by acpi_pcc_opregion_space_handler. This handler will fetch the complete buffer via internal_pcc_buffer. The setup handler will receive the special PCC context data which will contain the PCC channel index which used to set up the channel. The buffer pointer and length is saved in region context which is then used in the handler. (kernel test robot: Build failure with CONFIG_ACPI_DEBUGGER) Link: https://lore.kernel.org/r/202201041539.feAV0l27-lkp@intel.com Reported-by: kernel test robot Signed-off-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index b28f8790192a..93eaba2485e3 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1389,6 +1389,12 @@ static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level) } #endif +#ifdef CONFIG_ACPI_PCC +void acpi_init_pcc(void); +#else +static inline void acpi_init_pcc(void) { } +#endif + #ifdef CONFIG_ACPI extern void acpi_device_notify(struct device *dev); extern void acpi_device_notify_remove(struct device *dev); -- cgit v1.2.3 From 81d3f500ee98cf9f5388f0fb0548ba5a57598827 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 29 Sep 2021 18:17:44 +0900 Subject: scsi: core: Fix scsi_mode_select() interface The modepage argument is unused. Remove it. Link: https://lore.kernel.org/r/20210929091744.706003-3-damien.lemoal@wdc.com Reviewed-by: Himanshu Madhani Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index d1c6fc83b1e3..7571e23d8d8a 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -415,9 +415,8 @@ extern int scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, int retries, struct scsi_mode_data *data, struct scsi_sense_hdr *); extern int scsi_mode_select(struct scsi_device *sdev, int pf, int sp, - int modepage, unsigned char *buffer, int len, - int timeout, int retries, - struct scsi_mode_data *data, + unsigned char *buffer, int len, int timeout, + int retries, struct scsi_mode_data *data, struct scsi_sense_hdr *); extern int scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries, struct scsi_sense_hdr *sshdr); -- cgit v1.2.3 From 403a2e236538c6b479ea5bfc8b75a75540cfba6b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Dec 2021 11:51:34 -0700 Subject: dmaengine: idxd: change MSIX allocation based on per wq activation Change the driver where WQ interrupt is requested only when wq is being enabled. This new scheme set things up so that request_threaded_irq() is only called when a kernel wq type is being enabled. This also sets up for future interrupt request where different interrupt handler such as wq occupancy interrupt can be setup instead of the wq completion interrupt. Not calling request_irq() until the WQ actually needs an irq also prevents wasting of CPU irq vectors on x86 systems, which is a limited resource. idxd_flush_pending_descs() is moved to device.c since descriptor flushing is now part of wq disable rather than shutdown(). Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163942149487.2412839.6691222855803875848.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index c750eac09fc9..a8f0ff75c430 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -28,6 +28,7 @@ enum idxd_scmd_stat { IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000, IDXD_SCMD_WQ_NO_SIZE = 0x800e0000, IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, + IDXD_SCMD_WQ_IRQ_ERR = 0x80100000, }; #define IDXD_SCMD_SOFTERR_MASK 0x80000000 -- cgit v1.2.3 From c206a389c97c9533971cd05eed69b49f535cc193 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:31 +0100 Subject: ata: libata: tracepoints for bus-master DMA Add tracepoints for bus-master DMA and taskfile related functions. That allows us to drop the relevant DPRINTK() calls. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/trace/events/libata.h | 125 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) (limited to 'include') diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index 2394fc2b2831..acb9a4fc18ed 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -151,6 +151,9 @@ const char *libata_trace_parse_status(struct trace_seq*, unsigned char); #define __parse_status(s) libata_trace_parse_status(p, s) +const char *libata_trace_parse_host_stat(struct trace_seq *, unsigned char); +#define __parse_host_stat(s) libata_trace_parse_host_stat(p, s) + const char *libata_trace_parse_eh_action(struct trace_seq *, unsigned int); #define __parse_eh_action(a) libata_trace_parse_eh_action(p, a) @@ -299,6 +302,128 @@ DEFINE_EVENT(ata_qc_complete_template, ata_qc_complete_done, TP_PROTO(struct ata_queued_cmd *qc), TP_ARGS(qc)); +TRACE_EVENT(ata_tf_load, + + TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf), + + TP_ARGS(ap, tf), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __field( unsigned char, cmd ) + __field( unsigned char, dev ) + __field( unsigned char, lbal ) + __field( unsigned char, lbam ) + __field( unsigned char, lbah ) + __field( unsigned char, nsect ) + __field( unsigned char, feature ) + __field( unsigned char, hob_lbal ) + __field( unsigned char, hob_lbam ) + __field( unsigned char, hob_lbah ) + __field( unsigned char, hob_nsect ) + __field( unsigned char, hob_feature ) + __field( unsigned char, proto ) + ), + + TP_fast_assign( + __entry->ata_port = ap->print_id; + __entry->proto = tf->protocol; + __entry->cmd = tf->command; + __entry->dev = tf->device; + __entry->lbal = tf->lbal; + __entry->lbam = tf->lbam; + __entry->lbah = tf->lbah; + __entry->hob_lbal = tf->hob_lbal; + __entry->hob_lbam = tf->hob_lbam; + __entry->hob_lbah = tf->hob_lbah; + __entry->feature = tf->feature; + __entry->hob_feature = tf->hob_feature; + __entry->nsect = tf->nsect; + __entry->hob_nsect = tf->hob_nsect; + ), + + TP_printk("ata_port=%u proto=%s cmd=%s%s " \ + " tf=(%02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x)", + __entry->ata_port, + show_protocol_name(__entry->proto), + show_opcode_name(__entry->cmd), + __parse_subcmd(__entry->cmd, __entry->feature, __entry->hob_nsect), + __entry->cmd, __entry->feature, __entry->nsect, + __entry->lbal, __entry->lbam, __entry->lbah, + __entry->hob_feature, __entry->hob_nsect, + __entry->hob_lbal, __entry->hob_lbam, __entry->hob_lbah, + __entry->dev) +); + +DECLARE_EVENT_CLASS(ata_exec_command_template, + + TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag), + + TP_ARGS(ap, tf, tag), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __field( unsigned int, tag ) + __field( unsigned char, cmd ) + __field( unsigned char, feature ) + __field( unsigned char, hob_nsect ) + __field( unsigned char, proto ) + ), + + TP_fast_assign( + __entry->ata_port = ap->print_id; + __entry->tag = tag; + __entry->proto = tf->protocol; + __entry->cmd = tf->command; + __entry->feature = tf->feature; + __entry->hob_nsect = tf->hob_nsect; + ), + + TP_printk("ata_port=%u tag=%d proto=%s cmd=%s%s", + __entry->ata_port, __entry->tag, + show_protocol_name(__entry->proto), + show_opcode_name(__entry->cmd), + __parse_subcmd(__entry->cmd, __entry->feature, __entry->hob_nsect)) +); + +DEFINE_EVENT(ata_exec_command_template, ata_exec_command, + TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag), + TP_ARGS(ap, tf, tag)); + +DEFINE_EVENT(ata_exec_command_template, ata_bmdma_setup, + TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag), + TP_ARGS(ap, tf, tag)); + +DEFINE_EVENT(ata_exec_command_template, ata_bmdma_start, + TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag), + TP_ARGS(ap, tf, tag)); + +DEFINE_EVENT(ata_exec_command_template, ata_bmdma_stop, + TP_PROTO(struct ata_port *ap, const struct ata_taskfile *tf, unsigned int tag), + TP_ARGS(ap, tf, tag)); + +TRACE_EVENT(ata_bmdma_status, + + TP_PROTO(struct ata_port *ap, unsigned int host_stat), + + TP_ARGS(ap, host_stat), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __field( unsigned int, tag ) + __field( unsigned char, host_stat ) + ), + + TP_fast_assign( + __entry->ata_port = ap->print_id; + __entry->host_stat = host_stat; + ), + + TP_printk("ata_port=%u host_stat=%s", + __entry->ata_port, + __parse_host_stat(__entry->host_stat)) +); + TRACE_EVENT(ata_eh_link_autopsy, TP_PROTO(struct ata_device *dev, unsigned int eh_action, unsigned int eh_err_mask), -- cgit v1.2.3 From 7fad6ad6a357c73f0bdf55476238ae2884de78a3 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:32 +0100 Subject: ata: libata-sff: tracepoints for HSM state machine Add tracepoints for the HSM state machine and drop DPRINTK calls Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/trace/events/libata.h | 125 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) (limited to 'include') diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index acb9a4fc18ed..fcb8fde39614 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -148,6 +148,15 @@ ata_class_name(ATA_DEV_ZAC_UNSUP), \ ata_class_name(ATA_DEV_NONE)) +#define ata_sff_hsm_state_name(state) { state, #state } +#define show_sff_hsm_state_name(val) \ + __print_symbolic(val, \ + ata_sff_hsm_state_name(HSM_ST_IDLE), \ + ata_sff_hsm_state_name(HSM_ST_FIRST), \ + ata_sff_hsm_state_name(HSM_ST), \ + ata_sff_hsm_state_name(HSM_ST_LAST), \ + ata_sff_hsm_state_name(HSM_ST_ERR)) + const char *libata_trace_parse_status(struct trace_seq*, unsigned char); #define __parse_status(s) libata_trace_parse_status(p, s) @@ -163,6 +172,9 @@ const char *libata_trace_parse_eh_err_mask(struct trace_seq *, unsigned int); const char *libata_trace_parse_qc_flags(struct trace_seq *, unsigned int); #define __parse_qc_flags(f) libata_trace_parse_qc_flags(p, f) +const char *libata_trace_parse_tf_flags(struct trace_seq *, unsigned int); +#define __parse_tf_flags(f) libata_trace_parse_tf_flags(p, f) + const char *libata_trace_parse_subcmd(struct trace_seq *, unsigned char, unsigned char, unsigned char); #define __parse_subcmd(c,f,h) libata_trace_parse_subcmd(p, c, f, h) @@ -558,6 +570,119 @@ DEFINE_EVENT(ata_link_reset_end_template, ata_slave_postreset, TP_PROTO(struct ata_link *link, unsigned int *class, int rc), TP_ARGS(link, class, rc)); +DECLARE_EVENT_CLASS(ata_sff_hsm_template, + + TP_PROTO(struct ata_queued_cmd *qc, unsigned char status), + + TP_ARGS(qc, status), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __field( unsigned int, ata_dev ) + __field( unsigned int, tag ) + __field( unsigned int, qc_flags ) + __field( unsigned int, protocol ) + __field( unsigned int, hsm_state ) + __field( unsigned char, dev_state ) + ), + + TP_fast_assign( + __entry->ata_port = qc->ap->print_id; + __entry->ata_dev = qc->dev->link->pmp + qc->dev->devno; + __entry->tag = qc->tag; + __entry->qc_flags = qc->flags; + __entry->protocol = qc->tf.protocol; + __entry->hsm_state = qc->ap->hsm_task_state; + __entry->dev_state = status; + ), + + TP_printk("ata_port=%u ata_dev=%u tag=%d proto=%s flags=%s task_state=%s dev_stat=0x%X", + __entry->ata_port, __entry->ata_dev, __entry->tag, + show_protocol_name(__entry->protocol), + __parse_qc_flags(__entry->qc_flags), + show_sff_hsm_state_name(__entry->hsm_state), + __entry->dev_state) +); + +DEFINE_EVENT(ata_sff_hsm_template, ata_sff_hsm_state, + TP_PROTO(struct ata_queued_cmd *qc, unsigned char state), + TP_ARGS(qc, state)); + +DEFINE_EVENT(ata_sff_hsm_template, ata_sff_hsm_command_complete, + TP_PROTO(struct ata_queued_cmd *qc, unsigned char state), + TP_ARGS(qc, state)); + +DEFINE_EVENT(ata_sff_hsm_template, ata_sff_port_intr, + TP_PROTO(struct ata_queued_cmd *qc, unsigned char state), + TP_ARGS(qc, state)); + +DECLARE_EVENT_CLASS(ata_transfer_data_template, + + TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count), + + TP_ARGS(qc, offset, count), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __field( unsigned int, ata_dev ) + __field( unsigned int, tag ) + __field( unsigned int, flags ) + __field( unsigned int, offset ) + __field( unsigned int, bytes ) + ), + + TP_fast_assign( + __entry->ata_port = qc->ap->print_id; + __entry->ata_dev = qc->dev->link->pmp + qc->dev->devno; + __entry->tag = qc->tag; + __entry->flags = qc->tf.flags; + __entry->offset = offset; + __entry->bytes = count; + ), + + TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s offset=%u bytes=%u", + __entry->ata_port, __entry->ata_dev, __entry->tag, + __parse_tf_flags(__entry->flags), + __entry->offset, __entry->bytes) +); + +DEFINE_EVENT(ata_transfer_data_template, ata_sff_pio_transfer_data, + TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count), + TP_ARGS(qc, offset, count)); + +DEFINE_EVENT(ata_transfer_data_template, atapi_pio_transfer_data, + TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count), + TP_ARGS(qc, offset, count)); + +DEFINE_EVENT(ata_transfer_data_template, atapi_send_cdb, + TP_PROTO(struct ata_queued_cmd *qc, unsigned int offset, unsigned int count), + TP_ARGS(qc, offset, count)); + +DECLARE_EVENT_CLASS(ata_sff_template, + + TP_PROTO(struct ata_port *ap), + + TP_ARGS(ap), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __field( unsigned char, hsm_state ) + ), + + TP_fast_assign( + __entry->ata_port = ap->print_id; + __entry->hsm_state = ap->hsm_task_state; + ), + + TP_printk("ata_port=%u task_state=%s", + __entry->ata_port, + show_sff_hsm_state_name(__entry->hsm_state)) +); + +DEFINE_EVENT(ata_sff_template, ata_sff_flush_pio_task, + TP_PROTO(struct ata_port *ap), + TP_ARGS(ap)); + #endif /* _TRACE_LIBATA_H */ /* This part must be outside protection */ -- cgit v1.2.3 From c318458c9359ce3d10943b0c15a9b9f43dda7b2e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:34 +0100 Subject: ata: libata: add tracepoints for ATA error handling Add tracepoints for ATA error handling. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/trace/events/libata.h | 60 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include') diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index fcb8fde39614..d4e631aa976f 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -490,6 +490,37 @@ TRACE_EVENT(ata_eh_link_autopsy_qc, __parse_eh_err_mask(__entry->eh_err_mask)) ); +DECLARE_EVENT_CLASS(ata_eh_action_template, + + TP_PROTO(struct ata_link *link, unsigned int devno, unsigned int eh_action), + + TP_ARGS(link, devno, eh_action), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + __field( unsigned int, ata_dev ) + __field( unsigned int, eh_action ) + ), + + TP_fast_assign( + __entry->ata_port = link->ap->print_id; + __entry->ata_dev = link->pmp + devno; + __entry->eh_action = eh_action; + ), + + TP_printk("ata_port=%u ata_dev=%u eh_action=%s", + __entry->ata_port, __entry->ata_dev, + __parse_eh_action(__entry->eh_action)) +); + +DEFINE_EVENT(ata_eh_action_template, ata_eh_about_to_do, + TP_PROTO(struct ata_link *link, unsigned int devno, unsigned int eh_action), + TP_ARGS(link, devno, eh_action)); + +DEFINE_EVENT(ata_eh_action_template, ata_eh_done, + TP_PROTO(struct ata_link *link, unsigned int devno, unsigned int eh_action), + TP_ARGS(link, devno, eh_action)); + DECLARE_EVENT_CLASS(ata_link_reset_begin_template, TP_PROTO(struct ata_link *link, unsigned int *class, unsigned long deadline), @@ -570,6 +601,35 @@ DEFINE_EVENT(ata_link_reset_end_template, ata_slave_postreset, TP_PROTO(struct ata_link *link, unsigned int *class, int rc), TP_ARGS(link, class, rc)); +DECLARE_EVENT_CLASS(ata_port_eh_begin_template, + + TP_PROTO(struct ata_port *ap), + + TP_ARGS(ap), + + TP_STRUCT__entry( + __field( unsigned int, ata_port ) + ), + + TP_fast_assign( + __entry->ata_port = ap->print_id; + ), + + TP_printk("ata_port=%u", __entry->ata_port) +); + +DEFINE_EVENT(ata_port_eh_begin_template, ata_std_sched_eh, + TP_PROTO(struct ata_port *ap), + TP_ARGS(ap)); + +DEFINE_EVENT(ata_port_eh_begin_template, ata_port_freeze, + TP_PROTO(struct ata_port *ap), + TP_ARGS(ap)); + +DEFINE_EVENT(ata_port_eh_begin_template, ata_port_thaw, + TP_PROTO(struct ata_port *ap), + TP_ARGS(ap)); + DECLARE_EVENT_CLASS(ata_sff_hsm_template, TP_PROTO(struct ata_queued_cmd *qc, unsigned char status), -- cgit v1.2.3 From 742bef476ca5352b16063161fb73a56629a6d995 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:35 +0100 Subject: ata: libata: move ata_{port,link,dev}_dbg to standard pr_XXX() macros Use standard pr_{debug,info,notice,warn,err} macros instead of the hand-crafted printk helpers. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/linux/libata.h | 69 +++++++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 235fdbeb19ea..39cdde0b9491 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1489,51 +1489,61 @@ static inline int sata_srst_pmp(struct ata_link *link) return link->pmp; } -/* - * printk helpers - */ -__printf(3, 4) -void ata_port_printk(const struct ata_port *ap, const char *level, - const char *fmt, ...); -__printf(3, 4) -void ata_link_printk(const struct ata_link *link, const char *level, - const char *fmt, ...); -__printf(3, 4) -void ata_dev_printk(const struct ata_device *dev, const char *level, - const char *fmt, ...); +#define ata_port_printk(level, ap, fmt, ...) \ + pr_ ## level ("ata%u: " fmt, (ap)->print_id, ##__VA_ARGS__) #define ata_port_err(ap, fmt, ...) \ - ata_port_printk(ap, KERN_ERR, fmt, ##__VA_ARGS__) + ata_port_printk(err, ap, fmt, ##__VA_ARGS__) #define ata_port_warn(ap, fmt, ...) \ - ata_port_printk(ap, KERN_WARNING, fmt, ##__VA_ARGS__) + ata_port_printk(warn, ap, fmt, ##__VA_ARGS__) #define ata_port_notice(ap, fmt, ...) \ - ata_port_printk(ap, KERN_NOTICE, fmt, ##__VA_ARGS__) + ata_port_printk(notice, ap, fmt, ##__VA_ARGS__) #define ata_port_info(ap, fmt, ...) \ - ata_port_printk(ap, KERN_INFO, fmt, ##__VA_ARGS__) + ata_port_printk(info, ap, fmt, ##__VA_ARGS__) #define ata_port_dbg(ap, fmt, ...) \ - ata_port_printk(ap, KERN_DEBUG, fmt, ##__VA_ARGS__) + ata_port_printk(debug, ap, fmt, ##__VA_ARGS__) + +#define ata_link_printk(level, link, fmt, ...) \ +do { \ + if (sata_pmp_attached((link)->ap) || \ + (link)->ap->slave_link) \ + pr_ ## level ("ata%u.%02u: " fmt, \ + (link)->ap->print_id, \ + (link)->pmp, \ + ##__VA_ARGS__); \ + else \ + pr_ ## level ("ata%u: " fmt, \ + (link)->ap->print_id, \ + ##__VA_ARGS__); \ +} while (0) #define ata_link_err(link, fmt, ...) \ - ata_link_printk(link, KERN_ERR, fmt, ##__VA_ARGS__) + ata_link_printk(err, link, fmt, ##__VA_ARGS__) #define ata_link_warn(link, fmt, ...) \ - ata_link_printk(link, KERN_WARNING, fmt, ##__VA_ARGS__) + ata_link_printk(warn, link, fmt, ##__VA_ARGS__) #define ata_link_notice(link, fmt, ...) \ - ata_link_printk(link, KERN_NOTICE, fmt, ##__VA_ARGS__) + ata_link_printk(notice, link, fmt, ##__VA_ARGS__) #define ata_link_info(link, fmt, ...) \ - ata_link_printk(link, KERN_INFO, fmt, ##__VA_ARGS__) + ata_link_printk(info, link, fmt, ##__VA_ARGS__) #define ata_link_dbg(link, fmt, ...) \ - ata_link_printk(link, KERN_DEBUG, fmt, ##__VA_ARGS__) + ata_link_printk(debug, link, fmt, ##__VA_ARGS__) + +#define ata_dev_printk(level, dev, fmt, ...) \ + pr_ ## level("ata%u.%02u: " fmt, \ + (dev)->link->ap->print_id, \ + (dev)->link->pmp + (dev)->devno, \ + ##__VA_ARGS__) #define ata_dev_err(dev, fmt, ...) \ - ata_dev_printk(dev, KERN_ERR, fmt, ##__VA_ARGS__) + ata_dev_printk(err, dev, fmt, ##__VA_ARGS__) #define ata_dev_warn(dev, fmt, ...) \ - ata_dev_printk(dev, KERN_WARNING, fmt, ##__VA_ARGS__) + ata_dev_printk(warn, dev, fmt, ##__VA_ARGS__) #define ata_dev_notice(dev, fmt, ...) \ - ata_dev_printk(dev, KERN_NOTICE, fmt, ##__VA_ARGS__) + ata_dev_printk(notice, dev, fmt, ##__VA_ARGS__) #define ata_dev_info(dev, fmt, ...) \ - ata_dev_printk(dev, KERN_INFO, fmt, ##__VA_ARGS__) + ata_dev_printk(info, dev, fmt, ##__VA_ARGS__) #define ata_dev_dbg(dev, fmt, ...) \ - ata_dev_printk(dev, KERN_DEBUG, fmt, ##__VA_ARGS__) + ata_dev_printk(debug, dev, fmt, ##__VA_ARGS__) void ata_print_version(const struct device *dev, const char *version); @@ -2067,11 +2077,8 @@ static inline u8 ata_wait_idle(struct ata_port *ap) { u8 status = ata_sff_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000); -#ifdef ATA_DEBUG if (status != 0xff && (status & (ATA_BUSY | ATA_DRQ))) - ata_port_printk(ap, KERN_DEBUG, "abnormal Status 0x%X\n", - status); -#endif + ata_port_dbg(ap, "abnormal Status 0x%X\n", status); return status; } -- cgit v1.2.3 From d97c75edd806669c9f4b56c0ddae37725c0b708c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:01 +0100 Subject: ata: libata: drop ata_msg_error() and ata_msg_intr() Unused. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/linux/libata.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 39cdde0b9491..4f0a85f4e69a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -78,8 +78,6 @@ enum { ATA_MSG_WARN = 0x0008, ATA_MSG_MALLOC = 0x0010, ATA_MSG_CTL = 0x0020, - ATA_MSG_INTR = 0x0040, - ATA_MSG_ERR = 0x0080, }; #define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV) @@ -88,8 +86,6 @@ enum { #define ata_msg_warn(p) ((p)->msg_enable & ATA_MSG_WARN) #define ata_msg_malloc(p) ((p)->msg_enable & ATA_MSG_MALLOC) #define ata_msg_ctl(p) ((p)->msg_enable & ATA_MSG_CTL) -#define ata_msg_intr(p) ((p)->msg_enable & ATA_MSG_INTR) -#define ata_msg_err(p) ((p)->msg_enable & ATA_MSG_ERR) static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) { -- cgit v1.2.3 From 5cef96b4207e01c9cdb7752acaa178056fe94632 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:02 +0100 Subject: ata: libata: drop ata_msg_ctl() The one caller have been converted to dynamic debugging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 4f0a85f4e69a..e384cce62963 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -77,7 +77,6 @@ enum { ATA_MSG_PROBE = 0x0004, ATA_MSG_WARN = 0x0008, ATA_MSG_MALLOC = 0x0010, - ATA_MSG_CTL = 0x0020, }; #define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV) @@ -85,7 +84,6 @@ enum { #define ata_msg_probe(p) ((p)->msg_enable & ATA_MSG_PROBE) #define ata_msg_warn(p) ((p)->msg_enable & ATA_MSG_WARN) #define ata_msg_malloc(p) ((p)->msg_enable & ATA_MSG_MALLOC) -#define ata_msg_ctl(p) ((p)->msg_enable & ATA_MSG_CTL) static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) { -- cgit v1.2.3 From 2f784b923d50cdef1f6bd24d7c18614321b0833a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:03 +0100 Subject: ata: libata: drop ata_msg_malloc() Unused. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index e384cce62963..5651bbf4902b 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -76,14 +76,12 @@ enum { ATA_MSG_INFO = 0x0002, ATA_MSG_PROBE = 0x0004, ATA_MSG_WARN = 0x0008, - ATA_MSG_MALLOC = 0x0010, }; #define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV) #define ata_msg_info(p) ((p)->msg_enable & ATA_MSG_INFO) #define ata_msg_probe(p) ((p)->msg_enable & ATA_MSG_PROBE) #define ata_msg_warn(p) ((p)->msg_enable & ATA_MSG_WARN) -#define ata_msg_malloc(p) ((p)->msg_enable & ATA_MSG_MALLOC) static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) { -- cgit v1.2.3 From 16d424672716dc886fb58ec4a47a408db4781cc0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:04 +0100 Subject: ata: libata: drop ata_msg_warn() The WARN level was always enabled, so drop ata_msg_warn(). Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 5651bbf4902b..0e5ed2ff94be 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -75,13 +75,11 @@ enum { ATA_MSG_DRV = 0x0001, ATA_MSG_INFO = 0x0002, ATA_MSG_PROBE = 0x0004, - ATA_MSG_WARN = 0x0008, }; #define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV) #define ata_msg_info(p) ((p)->msg_enable & ATA_MSG_INFO) #define ata_msg_probe(p) ((p)->msg_enable & ATA_MSG_PROBE) -#define ata_msg_warn(p) ((p)->msg_enable & ATA_MSG_WARN) static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) { -- cgit v1.2.3 From 17a1e1be2fc7dc99945b41df0485037dcb6044d0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:05 +0100 Subject: ata: libata: drop ata_msg_probe() All callsites have been converted to dynamic debugging. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 0e5ed2ff94be..455d7e77e562 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -74,12 +74,10 @@ enum { ATA_MSG_DRV = 0x0001, ATA_MSG_INFO = 0x0002, - ATA_MSG_PROBE = 0x0004, }; #define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV) #define ata_msg_info(p) ((p)->msg_enable & ATA_MSG_INFO) -#define ata_msg_probe(p) ((p)->msg_enable & ATA_MSG_PROBE) static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) { -- cgit v1.2.3 From 96c810f216cb6da15bfa8fe8ef3bf73ca91c5dd8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:06 +0100 Subject: ata: libata: drop ata_msg_info() Convert the sole caller to ata_dev_dbg() and remove the definition. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 455d7e77e562..524d09b1dc82 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -73,11 +73,9 @@ enum { ATA_MSG_DRV = 0x0001, - ATA_MSG_INFO = 0x0002, }; #define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV) -#define ata_msg_info(p) ((p)->msg_enable & ATA_MSG_INFO) static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) { -- cgit v1.2.3 From 1c95a27c1e544f723f6e0e5a4384098f92996ec0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:07 +0100 Subject: ata: libata: drop ata_msg_drv() Callers are already protected by ata_dev_print_info(), so no need to have an additional configuration parameter here. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/linux/libata.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 524d09b1dc82..65172609a005 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -71,12 +71,6 @@ /* NEW: debug levels */ #define HAVE_LIBATA_MSG 1 -enum { - ATA_MSG_DRV = 0x0001, -}; - -#define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV) - static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) { if (dval < 0 || dval >= (sizeof(u32) * 8)) -- cgit v1.2.3 From db45905e74e6ae035305719bc683eca40f526669 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:08 +0100 Subject: ata: libata: remove 'new' ata message handling Remove the remaining bits for the 'new' ata message handling. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/linux/libata.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 65172609a005..145c0132b75e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -68,18 +68,6 @@ } \ }) -/* NEW: debug levels */ -#define HAVE_LIBATA_MSG 1 - -static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) -{ - if (dval < 0 || dval >= (sizeof(u32) * 8)) - return default_msg_enable_bits; /* should be 0x1 - only driver info msgs */ - if (!dval) - return 0; - return (1 << dval) - 1; -} - /* defines only for the constants which don't work well as enums */ #define ATA_TAG_POISON 0xfafbfcfdU @@ -864,7 +852,6 @@ struct ata_port { unsigned int hsm_task_state; - u32 msg_enable; struct list_head eh_done_q; wait_queue_head_t eh_wait_q; int eh_tries; -- cgit v1.2.3 From 870bb833c0acb29d8471eac5c2d2e6274826dbb6 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:21:09 +0100 Subject: ata: libata: remove debug compilation switches Unused now, so remove and drop any references to them. Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- include/linux/libata.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 145c0132b75e..c258f69106f4 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -39,25 +39,9 @@ * compile-time options: to be removed as soon as all the drivers are * converted to the new debugging mechanism */ -#undef ATA_DEBUG /* debugging output */ -#undef ATA_VERBOSE_DEBUG /* yet more debugging output */ #undef ATA_IRQ_TRAP /* define to ack screaming irqs */ -#undef ATA_NDEBUG /* define to disable quick runtime checks */ -/* note: prints function name for you */ -#ifdef ATA_DEBUG -#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args) -#ifdef ATA_VERBOSE_DEBUG -#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args) -#else -#define VPRINTK(fmt, args...) -#endif /* ATA_VERBOSE_DEBUG */ -#else -#define DPRINTK(fmt, args...) -#define VPRINTK(fmt, args...) -#endif /* ATA_DEBUG */ - #define ata_print_version_once(dev, version) \ ({ \ static bool __print_once; \ -- cgit v1.2.3 From cc4b08c31b5c51352f258032cc65e884b3e61e6a Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 7 Dec 2021 21:15:31 +0900 Subject: can: do not increase tx_bytes statistics for RTR frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The actual payload length of the CAN Remote Transmission Request (RTR) frames is always 0, i.e. no payload is transmitted on the wire. However, those RTR frames still use the DLC to indicate the length of the requested frame. As such, net_device_stats::tx_bytes should not be increased when sending RTR frames. The function can_get_echo_skb() already returns the correct length, even for RTR frames (c.f. [1]). However, for historical reasons, the drivers do not use can_get_echo_skb()'s return value and instead, most of them store a temporary length (or dlc) in some local structure or array. Using the return value of can_get_echo_skb() solves the issue. After doing this, such length/dlc fields become unused and so this patch does the adequate cleaning when needed. This patch fixes all the CAN drivers. Finally, can_get_echo_skb() is decorated with the __must_check attribute in order to force future drivers to correctly use its return value (else the compiler would emit a warning). [1] commit ed3320cec279 ("can: dev: __can_get_echo_skb(): fix real payload length return value for RTR frames") Link: https://lore.kernel.org/all/20211207121531.42941-6-mailhol.vincent@wanadoo.fr Cc: Nicolas Ferre Cc: Alexandre Belloni Cc: Ludovic Desroches Cc: Maxime Ripard Cc: Chen-Yu Tsai Cc: Jernej Skrabec Cc: Yasushi SHOJI Cc: Oliver Hartkopp Cc: Stephane Grosjean Cc: Andreas Larsson Tested-by: Jimmy Assarsson # kvaser Signed-off-by: Vincent Mailhol Acked-by: Stefan Mätje # esd_usb2 Tested-by: Stefan Mätje # esd_usb2 [mkl: add conversion for grcan] Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index d311bc369a39..fdb22b00674a 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -21,8 +21,9 @@ int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx, unsigned int frame_len); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr, unsigned int *frame_len_ptr); -unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx, - unsigned int *frame_len_ptr); +unsigned int __must_check can_get_echo_skb(struct net_device *dev, + unsigned int idx, + unsigned int *frame_len_ptr); void can_free_echo_skb(struct net_device *dev, unsigned int idx, unsigned int *frame_len_ptr); struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); -- cgit v1.2.3 From c9e1d8ed304cc6106c3241add170193995953325 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 14 Dec 2021 01:02:23 +0900 Subject: can: dev: replace can_priv::ctrlmode_static by can_get_static_ctrlmode() The statically enabled features of a CAN controller can be retrieved using below formula: | u32 ctrlmode_static = priv->ctrlmode & ~priv->ctrlmode_supported; As such, there is no need to store this information. This patch remove the field ctrlmode_static of struct can_priv and provides, in replacement, the inline function can_get_static_ctrlmode() which returns the same value. Link: https://lore.kernel.org/all/20211213160226.56219-2-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 45f19d9db5ca..92e2d69462f0 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -69,7 +69,6 @@ struct can_priv { /* CAN controller features - see include/uapi/linux/can/netlink.h */ u32 ctrlmode; /* current options setting */ u32 ctrlmode_supported; /* options that can be modified by netlink */ - u32 ctrlmode_static; /* static enabled options for driver/hardware */ int restart_ms; struct delayed_work restart_work; @@ -139,13 +138,17 @@ static inline void can_set_static_ctrlmode(struct net_device *dev, /* alloc_candev() succeeded => netdev_priv() is valid at this point */ priv->ctrlmode = static_mode; - priv->ctrlmode_static = static_mode; /* override MTU which was set by default in can_setup()? */ if (static_mode & CAN_CTRLMODE_FD) dev->mtu = CANFD_MTU; } +static inline u32 can_get_static_ctrlmode(struct can_priv *priv) +{ + return priv->ctrlmode & ~priv->ctrlmode_supported; +} + void can_setup(struct net_device *dev); struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, -- cgit v1.2.3 From 7d4a101c0bd3c6e5c6e45c705a54f7bc8f6c128d Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 14 Dec 2021 01:02:24 +0900 Subject: can: dev: add sanity check in can_set_static_ctrlmode() Previous patch removed can_priv::ctrlmode_static to replace it with can_get_static_ctrlmode(). A condition sine qua non for this to work is that the controller static modes should never be set in can_priv::ctrlmode_supported (c.f. the comment on can_priv::ctrlmode_supported which states that it is for "options that can be *modified* by netlink"). Also, this condition is already correctly fulfilled by all existing drivers which rely on the ctrlmode_static feature. Nonetheless, we added an extra safeguard in can_set_static_ctrlmode() to return an error value and to warn the developer who would be adventurous enough to set to static a given feature that is already set to supported. The drivers which rely on the static controller mode are then updated to check the return value of can_set_static_ctrlmode(). Link: https://lore.kernel.org/all/20211213160226.56219-3-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 92e2d69462f0..fff3f70df697 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -131,17 +131,24 @@ static inline s32 can_get_relative_tdco(const struct can_priv *priv) } /* helper to define static CAN controller features at device creation time */ -static inline void can_set_static_ctrlmode(struct net_device *dev, - u32 static_mode) +static inline int __must_check can_set_static_ctrlmode(struct net_device *dev, + u32 static_mode) { struct can_priv *priv = netdev_priv(dev); /* alloc_candev() succeeded => netdev_priv() is valid at this point */ + if (priv->ctrlmode_supported & static_mode) { + netdev_warn(dev, + "Controller features can not be supported and static at the same time\n"); + return -EINVAL; + } priv->ctrlmode = static_mode; /* override MTU which was set by default in can_setup()? */ if (static_mode & CAN_CTRLMODE_FD) dev->mtu = CANFD_MTU; + + return 0; } static inline u32 can_get_static_ctrlmode(struct can_priv *priv) -- cgit v1.2.3 From 5fe1be81efd28bf2afcac218f23118dca6b1b648 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 14 Dec 2021 01:02:25 +0900 Subject: can: dev: reorder struct can_priv members for better packing Save eight bytes of holes on x86-64 architectures by reordering the members of struct can_priv. Before: | $ pahole -C can_priv drivers/net/can/dev/dev.o | struct can_priv { | struct net_device * dev; /* 0 8 */ | struct can_device_stats can_stats; /* 8 24 */ | const struct can_bittiming_const * bittiming_const; /* 32 8 */ | const struct can_bittiming_const * data_bittiming_const; /* 40 8 */ | struct can_bittiming bittiming; /* 48 32 */ | /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */ | struct can_bittiming data_bittiming; /* 80 32 */ | const struct can_tdc_const * tdc_const; /* 112 8 */ | struct can_tdc tdc; /* 120 12 */ | /* --- cacheline 2 boundary (128 bytes) was 4 bytes ago --- */ | unsigned int bitrate_const_cnt; /* 132 4 */ | const u32 * bitrate_const; /* 136 8 */ | const u32 * data_bitrate_const; /* 144 8 */ | unsigned int data_bitrate_const_cnt; /* 152 4 */ | u32 bitrate_max; /* 156 4 */ | struct can_clock clock; /* 160 4 */ | unsigned int termination_const_cnt; /* 164 4 */ | const u16 * termination_const; /* 168 8 */ | u16 termination; /* 176 2 */ | | /* XXX 6 bytes hole, try to pack */ | | struct gpio_desc * termination_gpio; /* 184 8 */ | /* --- cacheline 3 boundary (192 bytes) --- */ | u16 termination_gpio_ohms[2]; /* 192 4 */ | enum can_state state; /* 196 4 */ | u32 ctrlmode; /* 200 4 */ | u32 ctrlmode_supported; /* 204 4 */ | int restart_ms; /* 208 4 */ | | /* XXX 4 bytes hole, try to pack */ | | struct delayed_work restart_work; /* 216 88 */ | | /* XXX last struct has 4 bytes of padding */ | | /* --- cacheline 4 boundary (256 bytes) was 48 bytes ago --- */ | int (*do_set_bittiming)(struct net_device *); /* 304 8 */ | int (*do_set_data_bittiming)(struct net_device *); /* 312 8 */ | /* --- cacheline 5 boundary (320 bytes) --- */ | int (*do_set_mode)(struct net_device *, enum can_mode); /* 320 8 */ | int (*do_set_termination)(struct net_device *, u16); /* 328 8 */ | int (*do_get_state)(const struct net_device *, enum can_state *); /* 336 8 */ | int (*do_get_berr_counter)(const struct net_device *, struct can_berr_counter *); /* 344 8 */ | unsigned int echo_skb_max; /* 352 4 */ | | /* XXX 4 bytes hole, try to pack */ | | struct sk_buff * * echo_skb; /* 360 8 */ | | /* size: 368, cachelines: 6, members: 32 */ | /* sum members: 354, holes: 3, sum holes: 14 */ | /* paddings: 1, sum paddings: 4 */ | /* last cacheline: 48 bytes */ | }; After: | $ pahole -C can_priv drivers/net/can/dev/dev.o | struct can_priv { | struct net_device * dev; /* 0 8 */ | struct can_device_stats can_stats; /* 8 24 */ | const struct can_bittiming_const * bittiming_const; /* 32 8 */ | const struct can_bittiming_const * data_bittiming_const; /* 40 8 */ | struct can_bittiming bittiming; /* 48 32 */ | /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */ | struct can_bittiming data_bittiming; /* 80 32 */ | const struct can_tdc_const * tdc_const; /* 112 8 */ | struct can_tdc tdc; /* 120 12 */ | /* --- cacheline 2 boundary (128 bytes) was 4 bytes ago --- */ | unsigned int bitrate_const_cnt; /* 132 4 */ | const u32 * bitrate_const; /* 136 8 */ | const u32 * data_bitrate_const; /* 144 8 */ | unsigned int data_bitrate_const_cnt; /* 152 4 */ | u32 bitrate_max; /* 156 4 */ | struct can_clock clock; /* 160 4 */ | unsigned int termination_const_cnt; /* 164 4 */ | const u16 * termination_const; /* 168 8 */ | u16 termination; /* 176 2 */ | | /* XXX 6 bytes hole, try to pack */ | | struct gpio_desc * termination_gpio; /* 184 8 */ | /* --- cacheline 3 boundary (192 bytes) --- */ | u16 termination_gpio_ohms[2]; /* 192 4 */ | unsigned int echo_skb_max; /* 196 4 */ | struct sk_buff * * echo_skb; /* 200 8 */ | enum can_state state; /* 208 4 */ | u32 ctrlmode; /* 212 4 */ | u32 ctrlmode_supported; /* 216 4 */ | int restart_ms; /* 220 4 */ | struct delayed_work restart_work; /* 224 88 */ | | /* XXX last struct has 4 bytes of padding */ | | /* --- cacheline 4 boundary (256 bytes) was 56 bytes ago --- */ | int (*do_set_bittiming)(struct net_device *); /* 312 8 */ | /* --- cacheline 5 boundary (320 bytes) --- */ | int (*do_set_data_bittiming)(struct net_device *); /* 320 8 */ | int (*do_set_mode)(struct net_device *, enum can_mode); /* 328 8 */ | int (*do_set_termination)(struct net_device *, u16); /* 336 8 */ | int (*do_get_state)(const struct net_device *, enum can_state *); /* 344 8 */ | int (*do_get_berr_counter)(const struct net_device *, struct can_berr_counter *); /* 352 8 */ | | /* size: 360, cachelines: 6, members: 32 */ | /* sum members: 354, holes: 1, sum holes: 6 */ | /* paddings: 1, sum paddings: 4 */ | /* last cacheline: 40 bytes */ | }; Link: https://lore.kernel.org/all/20211213160226.56219-4-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index fff3f70df697..c2ea47f30046 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -64,6 +64,9 @@ struct can_priv { struct gpio_desc *termination_gpio; u16 termination_gpio_ohms[CAN_TERMINATION_GPIO_MAX]; + unsigned int echo_skb_max; + struct sk_buff **echo_skb; + enum can_state state; /* CAN controller features - see include/uapi/linux/can/netlink.h */ @@ -83,9 +86,6 @@ struct can_priv { struct can_berr_counter *bec); int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv); - unsigned int echo_skb_max; - struct sk_buff **echo_skb; - #ifdef CONFIG_CAN_LEDS struct led_trigger *tx_led_trig; char tx_led_trig_name[CAN_LED_NAME_SZ]; -- cgit v1.2.3 From 383f0993fc77152b0773c85ed69d6734baf9cb48 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 14 Dec 2021 01:02:26 +0900 Subject: can: netlink: report the CAN controller mode supported flags Currently, the CAN netlink interface provides no easy ways to check the capabilities of a given controller. The only method from the command line is to try each CAN_CTRLMODE_* individually to check whether the netlink interface returns an -EOPNOTSUPP error or not (alternatively, one may find it easier to directly check the source code of the driver instead...) This patch introduces a method for the user to check both the supported and the static capabilities. The proposed method introduces a new IFLA nest: IFLA_CAN_CTRLMODE_EXT which extends the current IFLA_CAN_CTRLMODE. This is done to guaranty a full forward and backward compatibility between the kernel and the user land applications. The IFLA_CAN_CTRLMODE_EXT nest contains one single entry: IFLA_CAN_CTRLMODE_SUPPORTED. Because this entry is only used in one direction: kernel to userland, no new struct nla_policy are introduced. Below table explains how IFLA_CAN_CTRLMODE_SUPPORTED (hereafter: "supported") and can_ctrlmode::flags (hereafter: "flags") allow us to identify both the supported and the static capabilities, when masked with any of the CAN_CTRLMODE_* bit flags: supported & flags & Controller capabilities CAN_CTRLMODE_* CAN_CTRLMODE_* ----------------------------------------------------------------------- false false Feature not supported (always disabled) false true Static feature (always enabled) true false Feature supported but disabled true true Feature supported and enabled Link: https://lore.kernel.org/all/20211213160226.56219-5-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/netlink.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index 75b85c60efb2..02ec32d69474 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -137,6 +137,7 @@ enum { IFLA_CAN_DATA_BITRATE_CONST, IFLA_CAN_BITRATE_MAX, IFLA_CAN_TDC, + IFLA_CAN_CTRLMODE_EXT, /* add new constants above here */ __IFLA_CAN_MAX, @@ -166,6 +167,18 @@ enum { IFLA_CAN_TDC_MAX = __IFLA_CAN_TDC - 1 }; +/* + * IFLA_CAN_CTRLMODE_EXT nest: controller mode extended parameters + */ +enum { + IFLA_CAN_CTRLMODE_UNSPEC, + IFLA_CAN_CTRLMODE_SUPPORTED, /* u32 */ + + /* add new constants above here */ + __IFLA_CAN_CTRLMODE, + IFLA_CAN_CTRLMODE_MAX = __IFLA_CAN_CTRLMODE - 1 +}; + /* u16 termination range: 1..65535 Ohms */ #define CAN_TERMINATION_DISABLED 0 -- cgit v1.2.3 From c6af53f038aa32cec12e8a305ba07c7ef168f1b0 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 4 Jan 2022 12:07:00 +0000 Subject: net: mdio: add helpers to extract clause 45 regad and devad fields Add a couple of helpers and definitions to extract the clause 45 regad and devad fields from the regnum passed into MDIO drivers. Tested-by: Daniel Golle Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Signed-off-by: Daniel Golle Signed-off-by: David S. Miller --- include/linux/mdio.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 9f3587a61e14..ecac96d52e01 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -7,6 +7,7 @@ #define __LINUX_MDIO_H__ #include +#include #include /* Or MII_ADDR_C45 into regnum for read/write on mii_bus to enable the 21 bit @@ -14,6 +15,7 @@ */ #define MII_ADDR_C45 (1<<30) #define MII_DEVADDR_C45_SHIFT 16 +#define MII_DEVADDR_C45_MASK GENMASK(20, 16) #define MII_REGADDR_C45_MASK GENMASK(15, 0) struct gpio_desc; @@ -381,6 +383,16 @@ static inline u32 mdiobus_c45_addr(int devad, u16 regnum) return MII_ADDR_C45 | devad << MII_DEVADDR_C45_SHIFT | regnum; } +static inline u16 mdiobus_c45_regad(u32 regnum) +{ + return FIELD_GET(MII_REGADDR_C45_MASK, regnum); +} + +static inline u16 mdiobus_c45_devad(u32 regnum) +{ + return FIELD_GET(MII_DEVADDR_C45_MASK, regnum); +} + static inline int __mdiobus_c45_read(struct mii_bus *bus, int prtad, int devad, u16 regnum) { -- cgit v1.2.3 From b08db33dabd18c5bd9a419b46302c186801eab10 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 5 Jan 2022 15:21:35 +0200 Subject: net: dsa: move dsa_port :: stp_state near dsa_port :: mac The MAC address of a port is 6 octets in size, and this creates a 2 octet hole after it. There are some other u8 members of struct dsa_port that we can put in that hole. One such member is the stp_state. Before: pahole -C dsa_port net/dsa/slave.o struct dsa_port { union { struct net_device * master; /* 0 8 */ struct net_device * slave; /* 0 8 */ }; /* 0 8 */ const struct dsa_device_ops * tag_ops; /* 8 8 */ struct dsa_switch_tree * dst; /* 16 8 */ struct sk_buff * (*rcv)(struct sk_buff *, struct net_device *); /* 24 8 */ enum { DSA_PORT_TYPE_UNUSED = 0, DSA_PORT_TYPE_CPU = 1, DSA_PORT_TYPE_DSA = 2, DSA_PORT_TYPE_USER = 3, } type; /* 32 4 */ /* XXX 4 bytes hole, try to pack */ struct dsa_switch * ds; /* 40 8 */ unsigned int index; /* 48 4 */ /* XXX 4 bytes hole, try to pack */ const char * name; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct dsa_port * cpu_dp; /* 64 8 */ u8 mac[6]; /* 72 6 */ /* XXX 2 bytes hole, try to pack */ struct device_node * dn; /* 80 8 */ unsigned int ageing_time; /* 88 4 */ bool vlan_filtering; /* 92 1 */ bool learning; /* 93 1 */ u8 stp_state; /* 94 1 */ /* XXX 1 byte hole, try to pack */ struct dsa_bridge * bridge; /* 96 8 */ struct devlink_port devlink_port; /* 104 288 */ /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */ bool devlink_port_setup; /* 392 1 */ /* XXX 7 bytes hole, try to pack */ struct phylink * pl; /* 400 8 */ struct phylink_config pl_config; /* 408 40 */ /* --- cacheline 7 boundary (448 bytes) --- */ struct net_device * lag_dev; /* 448 8 */ bool lag_tx_enabled; /* 456 1 */ /* XXX 7 bytes hole, try to pack */ struct net_device * hsr_dev; /* 464 8 */ struct list_head list; /* 472 16 */ const struct ethtool_ops * orig_ethtool_ops; /* 488 8 */ const struct dsa_netdevice_ops * netdev_ops; /* 496 8 */ struct mutex addr_lists_lock; /* 504 32 */ /* --- cacheline 8 boundary (512 bytes) was 24 bytes ago --- */ struct list_head fdbs; /* 536 16 */ struct list_head mdbs; /* 552 16 */ bool setup; /* 568 1 */ /* size: 576, cachelines: 9, members: 30 */ /* sum members: 544, holes: 6, sum holes: 25 */ /* padding: 7 */ }; After: pahole -C dsa_port net/dsa/slave.o struct dsa_port { union { struct net_device * master; /* 0 8 */ struct net_device * slave; /* 0 8 */ }; /* 0 8 */ const struct dsa_device_ops * tag_ops; /* 8 8 */ struct dsa_switch_tree * dst; /* 16 8 */ struct sk_buff * (*rcv)(struct sk_buff *, struct net_device *); /* 24 8 */ enum { DSA_PORT_TYPE_UNUSED = 0, DSA_PORT_TYPE_CPU = 1, DSA_PORT_TYPE_DSA = 2, DSA_PORT_TYPE_USER = 3, } type; /* 32 4 */ /* XXX 4 bytes hole, try to pack */ struct dsa_switch * ds; /* 40 8 */ unsigned int index; /* 48 4 */ /* XXX 4 bytes hole, try to pack */ const char * name; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct dsa_port * cpu_dp; /* 64 8 */ u8 mac[6]; /* 72 6 */ u8 stp_state; /* 78 1 */ /* XXX 1 byte hole, try to pack */ struct device_node * dn; /* 80 8 */ unsigned int ageing_time; /* 88 4 */ bool vlan_filtering; /* 92 1 */ bool learning; /* 93 1 */ /* XXX 2 bytes hole, try to pack */ struct dsa_bridge * bridge; /* 96 8 */ struct devlink_port devlink_port; /* 104 288 */ /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */ bool devlink_port_setup; /* 392 1 */ /* XXX 7 bytes hole, try to pack */ struct phylink * pl; /* 400 8 */ struct phylink_config pl_config; /* 408 40 */ /* --- cacheline 7 boundary (448 bytes) --- */ struct net_device * lag_dev; /* 448 8 */ bool lag_tx_enabled; /* 456 1 */ /* XXX 7 bytes hole, try to pack */ struct net_device * hsr_dev; /* 464 8 */ struct list_head list; /* 472 16 */ const struct ethtool_ops * orig_ethtool_ops; /* 488 8 */ const struct dsa_netdevice_ops * netdev_ops; /* 496 8 */ struct mutex addr_lists_lock; /* 504 32 */ /* --- cacheline 8 boundary (512 bytes) was 24 bytes ago --- */ struct list_head fdbs; /* 536 16 */ struct list_head mdbs; /* 552 16 */ bool setup; /* 568 1 */ /* size: 576, cachelines: 9, members: 30 */ /* sum members: 544, holes: 6, sum holes: 25 */ /* padding: 7 */ }; Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index f16959444ae1..8878f9ce251b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -258,12 +258,14 @@ struct dsa_port { const char *name; struct dsa_port *cpu_dp; u8 mac[ETH_ALEN]; + + u8 stp_state; + struct device_node *dn; unsigned int ageing_time; bool vlan_filtering; /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ bool learning; - u8 stp_state; struct dsa_bridge *bridge; struct devlink_port devlink_port; bool devlink_port_setup; -- cgit v1.2.3 From bde82f389af1225df0798851eedbcd03445530d9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 5 Jan 2022 15:21:36 +0200 Subject: net: dsa: merge all bools of struct dsa_port into a single u8 struct dsa_port has 5 bool members which create quite a number of 7 byte holes in the structure layout. By merging them all into bitfields of an u8, and placing that u8 in the 1-byte hole after dp->mac and dp->stp_state, we can reduce the structure size from 576 bytes to 552 bytes on arm64. Before: pahole -C dsa_port net/dsa/slave.o struct dsa_port { union { struct net_device * master; /* 0 8 */ struct net_device * slave; /* 0 8 */ }; /* 0 8 */ const struct dsa_device_ops * tag_ops; /* 8 8 */ struct dsa_switch_tree * dst; /* 16 8 */ struct sk_buff * (*rcv)(struct sk_buff *, struct net_device *); /* 24 8 */ enum { DSA_PORT_TYPE_UNUSED = 0, DSA_PORT_TYPE_CPU = 1, DSA_PORT_TYPE_DSA = 2, DSA_PORT_TYPE_USER = 3, } type; /* 32 4 */ /* XXX 4 bytes hole, try to pack */ struct dsa_switch * ds; /* 40 8 */ unsigned int index; /* 48 4 */ /* XXX 4 bytes hole, try to pack */ const char * name; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct dsa_port * cpu_dp; /* 64 8 */ u8 mac[6]; /* 72 6 */ u8 stp_state; /* 78 1 */ /* XXX 1 byte hole, try to pack */ struct device_node * dn; /* 80 8 */ unsigned int ageing_time; /* 88 4 */ bool vlan_filtering; /* 92 1 */ bool learning; /* 93 1 */ /* XXX 2 bytes hole, try to pack */ struct dsa_bridge * bridge; /* 96 8 */ struct devlink_port devlink_port; /* 104 288 */ /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */ bool devlink_port_setup; /* 392 1 */ /* XXX 7 bytes hole, try to pack */ struct phylink * pl; /* 400 8 */ struct phylink_config pl_config; /* 408 40 */ /* --- cacheline 7 boundary (448 bytes) --- */ struct net_device * lag_dev; /* 448 8 */ bool lag_tx_enabled; /* 456 1 */ /* XXX 7 bytes hole, try to pack */ struct net_device * hsr_dev; /* 464 8 */ struct list_head list; /* 472 16 */ const struct ethtool_ops * orig_ethtool_ops; /* 488 8 */ const struct dsa_netdevice_ops * netdev_ops; /* 496 8 */ struct mutex addr_lists_lock; /* 504 32 */ /* --- cacheline 8 boundary (512 bytes) was 24 bytes ago --- */ struct list_head fdbs; /* 536 16 */ struct list_head mdbs; /* 552 16 */ bool setup; /* 568 1 */ /* size: 576, cachelines: 9, members: 30 */ /* sum members: 544, holes: 6, sum holes: 25 */ /* padding: 7 */ }; After: pahole -C dsa_port net/dsa/slave.o struct dsa_port { union { struct net_device * master; /* 0 8 */ struct net_device * slave; /* 0 8 */ }; /* 0 8 */ const struct dsa_device_ops * tag_ops; /* 8 8 */ struct dsa_switch_tree * dst; /* 16 8 */ struct sk_buff * (*rcv)(struct sk_buff *, struct net_device *); /* 24 8 */ enum { DSA_PORT_TYPE_UNUSED = 0, DSA_PORT_TYPE_CPU = 1, DSA_PORT_TYPE_DSA = 2, DSA_PORT_TYPE_USER = 3, } type; /* 32 4 */ /* XXX 4 bytes hole, try to pack */ struct dsa_switch * ds; /* 40 8 */ unsigned int index; /* 48 4 */ /* XXX 4 bytes hole, try to pack */ const char * name; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct dsa_port * cpu_dp; /* 64 8 */ u8 mac[6]; /* 72 6 */ u8 stp_state; /* 78 1 */ u8 vlan_filtering:1; /* 79: 0 1 */ u8 learning:1; /* 79: 1 1 */ u8 lag_tx_enabled:1; /* 79: 2 1 */ u8 devlink_port_setup:1; /* 79: 3 1 */ u8 setup:1; /* 79: 4 1 */ /* XXX 3 bits hole, try to pack */ struct device_node * dn; /* 80 8 */ unsigned int ageing_time; /* 88 4 */ /* XXX 4 bytes hole, try to pack */ struct dsa_bridge * bridge; /* 96 8 */ struct devlink_port devlink_port; /* 104 288 */ /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */ struct phylink * pl; /* 392 8 */ struct phylink_config pl_config; /* 400 40 */ struct net_device * lag_dev; /* 440 8 */ /* --- cacheline 7 boundary (448 bytes) --- */ struct net_device * hsr_dev; /* 448 8 */ struct list_head list; /* 456 16 */ const struct ethtool_ops * orig_ethtool_ops; /* 472 8 */ const struct dsa_netdevice_ops * netdev_ops; /* 480 8 */ struct mutex addr_lists_lock; /* 488 32 */ /* --- cacheline 8 boundary (512 bytes) was 8 bytes ago --- */ struct list_head fdbs; /* 520 16 */ struct list_head mdbs; /* 536 16 */ /* size: 552, cachelines: 9, members: 30 */ /* sum members: 539, holes: 3, sum holes: 12 */ /* sum bitfield members: 5 bits, bit holes: 1, sum bit holes: 3 bits */ /* last cacheline: 40 bytes */ }; Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 8878f9ce251b..a8f0037b58e2 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -261,18 +261,23 @@ struct dsa_port { u8 stp_state; + u8 vlan_filtering:1, + /* Managed by DSA on user ports and by + * drivers on CPU and DSA ports + */ + learning:1, + lag_tx_enabled:1, + devlink_port_setup:1, + setup:1; + struct device_node *dn; unsigned int ageing_time; - bool vlan_filtering; - /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ - bool learning; + struct dsa_bridge *bridge; struct devlink_port devlink_port; - bool devlink_port_setup; struct phylink *pl; struct phylink_config pl_config; struct net_device *lag_dev; - bool lag_tx_enabled; struct net_device *hsr_dev; struct list_head list; @@ -293,8 +298,6 @@ struct dsa_port { struct mutex addr_lists_lock; struct list_head fdbs; struct list_head mdbs; - - bool setup; }; /* TODO: ideally DSA ports would have a single dp->link_dp member, -- cgit v1.2.3 From 0625125877da9c1c09611a4acfd101571b7d1e16 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 5 Jan 2022 15:21:37 +0200 Subject: net: dsa: move dsa_port :: type near dsa_port :: index Both dsa_port :: type and dsa_port :: index introduce a 4 octet hole after them, so we can group them together and the holes would be eliminated, turning 16 octets of storage into just 8. This makes the cpu_dp pointer fit in the first cache line, which is good, because dsa_slave_to_master(), called by dsa_enqueue_skb(), uses it. Before: pahole -C dsa_port net/dsa/slave.o struct dsa_port { union { struct net_device * master; /* 0 8 */ struct net_device * slave; /* 0 8 */ }; /* 0 8 */ const struct dsa_device_ops * tag_ops; /* 8 8 */ struct dsa_switch_tree * dst; /* 16 8 */ struct sk_buff * (*rcv)(struct sk_buff *, struct net_device *); /* 24 8 */ enum { DSA_PORT_TYPE_UNUSED = 0, DSA_PORT_TYPE_CPU = 1, DSA_PORT_TYPE_DSA = 2, DSA_PORT_TYPE_USER = 3, } type; /* 32 4 */ /* XXX 4 bytes hole, try to pack */ struct dsa_switch * ds; /* 40 8 */ unsigned int index; /* 48 4 */ /* XXX 4 bytes hole, try to pack */ const char * name; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct dsa_port * cpu_dp; /* 64 8 */ u8 mac[6]; /* 72 6 */ u8 stp_state; /* 78 1 */ u8 vlan_filtering:1; /* 79: 0 1 */ u8 learning:1; /* 79: 1 1 */ u8 lag_tx_enabled:1; /* 79: 2 1 */ u8 devlink_port_setup:1; /* 79: 3 1 */ u8 setup:1; /* 79: 4 1 */ /* XXX 3 bits hole, try to pack */ struct device_node * dn; /* 80 8 */ unsigned int ageing_time; /* 88 4 */ /* XXX 4 bytes hole, try to pack */ struct dsa_bridge * bridge; /* 96 8 */ struct devlink_port devlink_port; /* 104 288 */ /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */ struct phylink * pl; /* 392 8 */ struct phylink_config pl_config; /* 400 40 */ struct net_device * lag_dev; /* 440 8 */ /* --- cacheline 7 boundary (448 bytes) --- */ struct net_device * hsr_dev; /* 448 8 */ struct list_head list; /* 456 16 */ const struct ethtool_ops * orig_ethtool_ops; /* 472 8 */ const struct dsa_netdevice_ops * netdev_ops; /* 480 8 */ struct mutex addr_lists_lock; /* 488 32 */ /* --- cacheline 8 boundary (512 bytes) was 8 bytes ago --- */ struct list_head fdbs; /* 520 16 */ struct list_head mdbs; /* 536 16 */ /* size: 552, cachelines: 9, members: 30 */ /* sum members: 539, holes: 3, sum holes: 12 */ /* sum bitfield members: 5 bits, bit holes: 1, sum bit holes: 3 bits */ /* last cacheline: 40 bytes */ }; After: pahole -C dsa_port net/dsa/slave.o struct dsa_port { union { struct net_device * master; /* 0 8 */ struct net_device * slave; /* 0 8 */ }; /* 0 8 */ const struct dsa_device_ops * tag_ops; /* 8 8 */ struct dsa_switch_tree * dst; /* 16 8 */ struct sk_buff * (*rcv)(struct sk_buff *, struct net_device *); /* 24 8 */ struct dsa_switch * ds; /* 32 8 */ unsigned int index; /* 40 4 */ enum { DSA_PORT_TYPE_UNUSED = 0, DSA_PORT_TYPE_CPU = 1, DSA_PORT_TYPE_DSA = 2, DSA_PORT_TYPE_USER = 3, } type; /* 44 4 */ const char * name; /* 48 8 */ struct dsa_port * cpu_dp; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ u8 mac[6]; /* 64 6 */ u8 stp_state; /* 70 1 */ u8 vlan_filtering:1; /* 71: 0 1 */ u8 learning:1; /* 71: 1 1 */ u8 lag_tx_enabled:1; /* 71: 2 1 */ u8 devlink_port_setup:1; /* 71: 3 1 */ u8 setup:1; /* 71: 4 1 */ /* XXX 3 bits hole, try to pack */ struct device_node * dn; /* 72 8 */ unsigned int ageing_time; /* 80 4 */ /* XXX 4 bytes hole, try to pack */ struct dsa_bridge * bridge; /* 88 8 */ struct devlink_port devlink_port; /* 96 288 */ /* --- cacheline 6 boundary (384 bytes) --- */ struct phylink * pl; /* 384 8 */ struct phylink_config pl_config; /* 392 40 */ struct net_device * lag_dev; /* 432 8 */ struct net_device * hsr_dev; /* 440 8 */ /* --- cacheline 7 boundary (448 bytes) --- */ struct list_head list; /* 448 16 */ const struct ethtool_ops * orig_ethtool_ops; /* 464 8 */ const struct dsa_netdevice_ops * netdev_ops; /* 472 8 */ struct mutex addr_lists_lock; /* 480 32 */ /* --- cacheline 8 boundary (512 bytes) --- */ struct list_head fdbs; /* 512 16 */ struct list_head mdbs; /* 528 16 */ /* size: 544, cachelines: 9, members: 30 */ /* sum members: 539, holes: 1, sum holes: 4 */ /* sum bitfield members: 5 bits, bit holes: 1, sum bit holes: 3 bits */ /* last cacheline: 32 bytes */ }; Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index a8f0037b58e2..5e42fa7ea377 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -246,6 +246,10 @@ struct dsa_port { struct dsa_switch_tree *dst; struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); + struct dsa_switch *ds; + + unsigned int index; + enum { DSA_PORT_TYPE_UNUSED = 0, DSA_PORT_TYPE_CPU, @@ -253,8 +257,6 @@ struct dsa_port { DSA_PORT_TYPE_USER, } type; - struct dsa_switch *ds; - unsigned int index; const char *name; struct dsa_port *cpu_dp; u8 mac[ETH_ALEN]; -- cgit v1.2.3 From 7787ff7763989fe61db94a82d70d6fa80848d7c9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 5 Jan 2022 15:21:38 +0200 Subject: net: dsa: merge all bools of struct dsa_switch into a single u32 struct dsa_switch has 9 boolean properties, many of which are in fact set by drivers for custom behavior (vlan_filtering_is_global, needs_standalone_vlan_filtering, etc etc). The binary layout of the structure could be improved. For example, the "bool setup" at the beginning introduces a gratuitous 7 byte hole in the first cache line. The change merges all boolean properties into bitfields of an u32, and places that u32 in the first cache line of the structure, since many bools are accessed from the data path (untag_bridge_pvid, vlan_filtering, vlan_filtering_is_global). We place this u32 after the existing ds->index, which is also 4 bytes in size. As a positive side effect, ds->tagger_data now fits into the first cache line too, because 4 bytes are saved. Before: pahole -C dsa_switch net/dsa/slave.o struct dsa_switch { bool setup; /* 0 1 */ /* XXX 7 bytes hole, try to pack */ struct device * dev; /* 8 8 */ struct dsa_switch_tree * dst; /* 16 8 */ unsigned int index; /* 24 4 */ /* XXX 4 bytes hole, try to pack */ struct notifier_block nb; /* 32 24 */ /* XXX last struct has 4 bytes of padding */ void * priv; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ void * tagger_data; /* 64 8 */ struct dsa_chip_data * cd; /* 72 8 */ const struct dsa_switch_ops * ops; /* 80 8 */ u32 phys_mii_mask; /* 88 4 */ /* XXX 4 bytes hole, try to pack */ struct mii_bus * slave_mii_bus; /* 96 8 */ unsigned int ageing_time_min; /* 104 4 */ unsigned int ageing_time_max; /* 108 4 */ struct dsa_8021q_context * tag_8021q_ctx; /* 112 8 */ struct devlink * devlink; /* 120 8 */ /* --- cacheline 2 boundary (128 bytes) --- */ unsigned int num_tx_queues; /* 128 4 */ bool vlan_filtering_is_global; /* 132 1 */ bool needs_standalone_vlan_filtering; /* 133 1 */ bool configure_vlan_while_not_filtering; /* 134 1 */ bool untag_bridge_pvid; /* 135 1 */ bool assisted_learning_on_cpu_port; /* 136 1 */ bool vlan_filtering; /* 137 1 */ bool pcs_poll; /* 138 1 */ bool mtu_enforcement_ingress; /* 139 1 */ unsigned int num_lag_ids; /* 140 4 */ unsigned int max_num_bridges; /* 144 4 */ /* XXX 4 bytes hole, try to pack */ size_t num_ports; /* 152 8 */ /* size: 160, cachelines: 3, members: 27 */ /* sum members: 141, holes: 4, sum holes: 19 */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 32 bytes */ }; After: pahole -C dsa_switch net/dsa/slave.o struct dsa_switch { struct device * dev; /* 0 8 */ struct dsa_switch_tree * dst; /* 8 8 */ unsigned int index; /* 16 4 */ u32 setup:1; /* 20: 0 4 */ u32 vlan_filtering_is_global:1; /* 20: 1 4 */ u32 needs_standalone_vlan_filtering:1; /* 20: 2 4 */ u32 configure_vlan_while_not_filtering:1; /* 20: 3 4 */ u32 untag_bridge_pvid:1; /* 20: 4 4 */ u32 assisted_learning_on_cpu_port:1; /* 20: 5 4 */ u32 vlan_filtering:1; /* 20: 6 4 */ u32 pcs_poll:1; /* 20: 7 4 */ u32 mtu_enforcement_ingress:1; /* 20: 8 4 */ /* XXX 23 bits hole, try to pack */ struct notifier_block nb; /* 24 24 */ /* XXX last struct has 4 bytes of padding */ void * priv; /* 48 8 */ void * tagger_data; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct dsa_chip_data * cd; /* 64 8 */ const struct dsa_switch_ops * ops; /* 72 8 */ u32 phys_mii_mask; /* 80 4 */ /* XXX 4 bytes hole, try to pack */ struct mii_bus * slave_mii_bus; /* 88 8 */ unsigned int ageing_time_min; /* 96 4 */ unsigned int ageing_time_max; /* 100 4 */ struct dsa_8021q_context * tag_8021q_ctx; /* 104 8 */ struct devlink * devlink; /* 112 8 */ unsigned int num_tx_queues; /* 120 4 */ unsigned int num_lag_ids; /* 124 4 */ /* --- cacheline 2 boundary (128 bytes) --- */ unsigned int max_num_bridges; /* 128 4 */ /* XXX 4 bytes hole, try to pack */ size_t num_ports; /* 136 8 */ /* size: 144, cachelines: 3, members: 27 */ /* sum members: 132, holes: 2, sum holes: 8 */ /* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 23 bits */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 16 bytes */ }; Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 97 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 51 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 5e42fa7ea377..a8a586039033 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -321,8 +321,6 @@ struct dsa_mac_addr { }; struct dsa_switch { - bool setup; - struct device *dev; /* @@ -331,6 +329,57 @@ struct dsa_switch { struct dsa_switch_tree *dst; unsigned int index; + u32 setup:1, + /* Disallow bridge core from requesting + * different VLAN awareness settings on ports + * if not hardware-supported + */ + vlan_filtering_is_global:1, + /* Keep VLAN filtering enabled on ports not + * offloading any upper + */ + needs_standalone_vlan_filtering:1, + /* Pass .port_vlan_add and .port_vlan_del to + * drivers even for bridges that have + * vlan_filtering=0. All drivers should ideally + * set this (and then the option would get + * removed), but it is unknown whether this + * would break things or not. + */ + configure_vlan_while_not_filtering:1, + /* If the switch driver always programs the CPU + * port as egress tagged despite the VLAN + * configuration indicating otherwise, then + * setting @untag_bridge_pvid will force the + * DSA receive path to pop the bridge's + * default_pvid VLAN tagged frames to offer a + * consistent behavior between a + * vlan_filtering=0 and vlan_filtering=1 bridge + * device. + */ + untag_bridge_pvid:1, + /* Let DSA manage the FDB entries towards the + * CPU, based on the software bridge database. + */ + assisted_learning_on_cpu_port:1, + /* In case vlan_filtering_is_global is set, the + * VLAN awareness state should be retrieved + * from here and not from the per-port + * settings. + */ + vlan_filtering:1, + /* MAC PCS does not provide link state change + * interrupt, and requires polling. Flag passed + * on to PHYLINK. + */ + pcs_poll:1, + /* For switches that only have the MRU + * configurable. To ensure the configured MTU + * is not exceeded, normalization of MRU on all + * bridged interfaces is needed. + */ + mtu_enforcement_ingress:1; + /* Listener for switch fabric events */ struct notifier_block nb; @@ -371,50 +420,6 @@ struct dsa_switch { /* Number of switch port queues */ unsigned int num_tx_queues; - /* Disallow bridge core from requesting different VLAN awareness - * settings on ports if not hardware-supported - */ - bool vlan_filtering_is_global; - - /* Keep VLAN filtering enabled on ports not offloading any upper. */ - bool needs_standalone_vlan_filtering; - - /* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges - * that have vlan_filtering=0. All drivers should ideally set this (and - * then the option would get removed), but it is unknown whether this - * would break things or not. - */ - bool configure_vlan_while_not_filtering; - - /* If the switch driver always programs the CPU port as egress tagged - * despite the VLAN configuration indicating otherwise, then setting - * @untag_bridge_pvid will force the DSA receive path to pop the bridge's - * default_pvid VLAN tagged frames to offer a consistent behavior - * between a vlan_filtering=0 and vlan_filtering=1 bridge device. - */ - bool untag_bridge_pvid; - - /* Let DSA manage the FDB entries towards the CPU, based on the - * software bridge database. - */ - bool assisted_learning_on_cpu_port; - - /* In case vlan_filtering_is_global is set, the VLAN awareness state - * should be retrieved from here and not from the per-port settings. - */ - bool vlan_filtering; - - /* MAC PCS does not provide link state change interrupt, and requires - * polling. Flag passed on to PHYLINK. - */ - bool pcs_poll; - - /* For switches that only have the MRU configurable. To ensure the - * configured MTU is not exceeded, normalization of MRU on all bridged - * interfaces is needed. - */ - bool mtu_enforcement_ingress; - /* Drivers that benefit from having an ID associated with each * offloaded LAG should set this to the maximum number of * supported IDs. DSA will then maintain a mapping of _at -- cgit v1.2.3 From 258030acc93b459ca070924921f214bd05a2d56c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 5 Jan 2022 15:21:39 +0200 Subject: net: dsa: make dsa_switch :: num_ports an unsigned int Currently, num_ports is declared as size_t, which is defined as __kernel_ulong_t, therefore it occupies 8 bytes of memory. Even switches with port numbers in the range of tens are exotic, so there is no need for this amount of storage. Additionally, because the max_num_bridges member right above it is also 4 bytes, it means the compiler needs to add padding between the last 2 fields. By reducing the size, we don't need that padding and can reduce the struct size. Before: pahole -C dsa_switch net/dsa/slave.o struct dsa_switch { struct device * dev; /* 0 8 */ struct dsa_switch_tree * dst; /* 8 8 */ unsigned int index; /* 16 4 */ u32 setup:1; /* 20: 0 4 */ u32 vlan_filtering_is_global:1; /* 20: 1 4 */ u32 needs_standalone_vlan_filtering:1; /* 20: 2 4 */ u32 configure_vlan_while_not_filtering:1; /* 20: 3 4 */ u32 untag_bridge_pvid:1; /* 20: 4 4 */ u32 assisted_learning_on_cpu_port:1; /* 20: 5 4 */ u32 vlan_filtering:1; /* 20: 6 4 */ u32 pcs_poll:1; /* 20: 7 4 */ u32 mtu_enforcement_ingress:1; /* 20: 8 4 */ /* XXX 23 bits hole, try to pack */ struct notifier_block nb; /* 24 24 */ /* XXX last struct has 4 bytes of padding */ void * priv; /* 48 8 */ void * tagger_data; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct dsa_chip_data * cd; /* 64 8 */ const struct dsa_switch_ops * ops; /* 72 8 */ u32 phys_mii_mask; /* 80 4 */ /* XXX 4 bytes hole, try to pack */ struct mii_bus * slave_mii_bus; /* 88 8 */ unsigned int ageing_time_min; /* 96 4 */ unsigned int ageing_time_max; /* 100 4 */ struct dsa_8021q_context * tag_8021q_ctx; /* 104 8 */ struct devlink * devlink; /* 112 8 */ unsigned int num_tx_queues; /* 120 4 */ unsigned int num_lag_ids; /* 124 4 */ /* --- cacheline 2 boundary (128 bytes) --- */ unsigned int max_num_bridges; /* 128 4 */ /* XXX 4 bytes hole, try to pack */ size_t num_ports; /* 136 8 */ /* size: 144, cachelines: 3, members: 27 */ /* sum members: 132, holes: 2, sum holes: 8 */ /* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 23 bits */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 16 bytes */ }; After: pahole -C dsa_switch net/dsa/slave.o struct dsa_switch { struct device * dev; /* 0 8 */ struct dsa_switch_tree * dst; /* 8 8 */ unsigned int index; /* 16 4 */ u32 setup:1; /* 20: 0 4 */ u32 vlan_filtering_is_global:1; /* 20: 1 4 */ u32 needs_standalone_vlan_filtering:1; /* 20: 2 4 */ u32 configure_vlan_while_not_filtering:1; /* 20: 3 4 */ u32 untag_bridge_pvid:1; /* 20: 4 4 */ u32 assisted_learning_on_cpu_port:1; /* 20: 5 4 */ u32 vlan_filtering:1; /* 20: 6 4 */ u32 pcs_poll:1; /* 20: 7 4 */ u32 mtu_enforcement_ingress:1; /* 20: 8 4 */ /* XXX 23 bits hole, try to pack */ struct notifier_block nb; /* 24 24 */ /* XXX last struct has 4 bytes of padding */ void * priv; /* 48 8 */ void * tagger_data; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct dsa_chip_data * cd; /* 64 8 */ const struct dsa_switch_ops * ops; /* 72 8 */ u32 phys_mii_mask; /* 80 4 */ /* XXX 4 bytes hole, try to pack */ struct mii_bus * slave_mii_bus; /* 88 8 */ unsigned int ageing_time_min; /* 96 4 */ unsigned int ageing_time_max; /* 100 4 */ struct dsa_8021q_context * tag_8021q_ctx; /* 104 8 */ struct devlink * devlink; /* 112 8 */ unsigned int num_tx_queues; /* 120 4 */ unsigned int num_lag_ids; /* 124 4 */ /* --- cacheline 2 boundary (128 bytes) --- */ unsigned int max_num_bridges; /* 128 4 */ unsigned int num_ports; /* 132 4 */ /* size: 136, cachelines: 3, members: 27 */ /* sum members: 128, holes: 1, sum holes: 4 */ /* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 23 bits */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 8 bytes */ }; Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index a8a586039033..fef9d8bb5190 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -435,7 +435,7 @@ struct dsa_switch { */ unsigned int max_num_bridges; - size_t num_ports; + unsigned int num_ports; }; static inline struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) -- cgit v1.2.3 From b035c88c6a303d39d511ef71ac4e2e0176756661 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 5 Jan 2022 15:21:40 +0200 Subject: net: dsa: move dsa_switch_tree :: ports and lags to first cache line dst->ports is accessed most notably by dsa_master_find_slave(), which is invoked in the RX path. dst->lags is accessed by dsa_lag_dev(), which is invoked in the RX path of tag_dsa.c. dst->tag_ops, dst->default_proto and dst->pd don't need to be in the first cache line, so they are moved out by this change. Before: pahole -C dsa_switch_tree net/dsa/slave.o struct dsa_switch_tree { struct list_head list; /* 0 16 */ struct raw_notifier_head nh; /* 16 8 */ unsigned int index; /* 24 4 */ struct kref refcount; /* 28 4 */ bool setup; /* 32 1 */ /* XXX 7 bytes hole, try to pack */ const struct dsa_device_ops * tag_ops; /* 40 8 */ enum dsa_tag_protocol default_proto; /* 48 4 */ /* XXX 4 bytes hole, try to pack */ struct dsa_platform_data * pd; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct list_head ports; /* 64 16 */ struct list_head rtable; /* 80 16 */ struct net_device * * lags; /* 96 8 */ unsigned int lags_len; /* 104 4 */ unsigned int last_switch; /* 108 4 */ /* size: 112, cachelines: 2, members: 13 */ /* sum members: 101, holes: 2, sum holes: 11 */ /* last cacheline: 48 bytes */ }; After: pahole -C dsa_switch_tree net/dsa/slave.o struct dsa_switch_tree { struct list_head list; /* 0 16 */ struct list_head ports; /* 16 16 */ struct raw_notifier_head nh; /* 32 8 */ unsigned int index; /* 40 4 */ struct kref refcount; /* 44 4 */ struct net_device * * lags; /* 48 8 */ bool setup; /* 56 1 */ /* XXX 7 bytes hole, try to pack */ /* --- cacheline 1 boundary (64 bytes) --- */ const struct dsa_device_ops * tag_ops; /* 64 8 */ enum dsa_tag_protocol default_proto; /* 72 4 */ /* XXX 4 bytes hole, try to pack */ struct dsa_platform_data * pd; /* 80 8 */ struct list_head rtable; /* 88 16 */ unsigned int lags_len; /* 104 4 */ unsigned int last_switch; /* 108 4 */ /* size: 112, cachelines: 2, members: 13 */ /* sum members: 101, holes: 2, sum holes: 11 */ /* last cacheline: 48 bytes */ }; Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index fef9d8bb5190..cbbac75138d9 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -119,6 +119,9 @@ struct dsa_netdevice_ops { struct dsa_switch_tree { struct list_head list; + /* List of switch ports */ + struct list_head ports; + /* Notifier chain for switch-wide events */ struct raw_notifier_head nh; @@ -128,6 +131,11 @@ struct dsa_switch_tree { /* Number of switches attached to this tree */ struct kref refcount; + /* Maps offloaded LAG netdevs to a zero-based linear ID for + * drivers that need it. + */ + struct net_device **lags; + /* Has this tree been applied to the hardware? */ bool setup; @@ -145,16 +153,10 @@ struct dsa_switch_tree { */ struct dsa_platform_data *pd; - /* List of switch ports */ - struct list_head ports; - /* List of DSA links composing the routing table */ struct list_head rtable; - /* Maps offloaded LAG netdevs to a zero-based linear ID for - * drivers that need it. - */ - struct net_device **lags; + /* Length of "lags" array */ unsigned int lags_len; /* Track the largest switch index within a tree */ -- cgit v1.2.3 From 4b026e82893b2e084fcc48bc26f91612b62ecc4f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 5 Jan 2022 15:21:41 +0200 Subject: net: dsa: combine two holes in struct dsa_switch_tree There is a 7 byte hole after dst->setup and a 4 byte hole after dst->default_proto. Combining them, we have a single hole of just 3 bytes on 64 bit machines. Before: pahole -C dsa_switch_tree net/dsa/slave.o struct dsa_switch_tree { struct list_head list; /* 0 16 */ struct list_head ports; /* 16 16 */ struct raw_notifier_head nh; /* 32 8 */ unsigned int index; /* 40 4 */ struct kref refcount; /* 44 4 */ struct net_device * * lags; /* 48 8 */ bool setup; /* 56 1 */ /* XXX 7 bytes hole, try to pack */ /* --- cacheline 1 boundary (64 bytes) --- */ const struct dsa_device_ops * tag_ops; /* 64 8 */ enum dsa_tag_protocol default_proto; /* 72 4 */ /* XXX 4 bytes hole, try to pack */ struct dsa_platform_data * pd; /* 80 8 */ struct list_head rtable; /* 88 16 */ unsigned int lags_len; /* 104 4 */ unsigned int last_switch; /* 108 4 */ /* size: 112, cachelines: 2, members: 13 */ /* sum members: 101, holes: 2, sum holes: 11 */ /* last cacheline: 48 bytes */ }; After: pahole -C dsa_switch_tree net/dsa/slave.o struct dsa_switch_tree { struct list_head list; /* 0 16 */ struct list_head ports; /* 16 16 */ struct raw_notifier_head nh; /* 32 8 */ unsigned int index; /* 40 4 */ struct kref refcount; /* 44 4 */ struct net_device * * lags; /* 48 8 */ const struct dsa_device_ops * tag_ops; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ enum dsa_tag_protocol default_proto; /* 64 4 */ bool setup; /* 68 1 */ /* XXX 3 bytes hole, try to pack */ struct dsa_platform_data * pd; /* 72 8 */ struct list_head rtable; /* 80 16 */ unsigned int lags_len; /* 96 4 */ unsigned int last_switch; /* 100 4 */ /* size: 104, cachelines: 2, members: 13 */ /* sum members: 101, holes: 1, sum holes: 3 */ /* last cacheline: 40 bytes */ }; Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index cbbac75138d9..5d0fec6db3ae 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -136,9 +136,6 @@ struct dsa_switch_tree { */ struct net_device **lags; - /* Has this tree been applied to the hardware? */ - bool setup; - /* Tagging protocol operations */ const struct dsa_device_ops *tag_ops; @@ -147,6 +144,9 @@ struct dsa_switch_tree { */ enum dsa_tag_protocol default_proto; + /* Has this tree been applied to the hardware? */ + bool setup; + /* * Configuration data for the platform device that owns * this dsa switch tree instance. -- cgit v1.2.3 From 01ec4a2e8f01f027a0f06cad237c935da8d643bf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 3 Jul 2021 00:23:39 +0200 Subject: headers/deps: USB: Optimize dependencies, remove The header is used over 1,400 times in a typical distro build, but few of its users actually need the full header. -------------------------------------------------------------------- | Combined, preprocessed C code size of header, without line markers, | with comments stripped: ------------------------- before: | #include | LOC: 7,078 | headers: 172 after: | #include | LOC: 812 | headers: 38 Remove it and add it to the places that need it. Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/ch9.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index 1cffa34740b0..969e7dba6358 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -33,7 +33,6 @@ #ifndef __LINUX_USB_CH9_H #define __LINUX_USB_CH9_H -#include #include /* USB 3.2 SuperSpeed Plus phy signaling rate generation and lane count */ @@ -45,6 +44,8 @@ enum usb_ssp_rate { USB_SSP_GEN_2x2, }; +struct device; + extern const char *usb_ep_type_string(int ep_type); extern const char *usb_speed_string(enum usb_device_speed speed); extern enum usb_device_speed usb_get_maximum_speed(struct device *dev); -- cgit v1.2.3 From d82e2b27ad3a4fdd745332e0c310ae05660a1bf1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 5 Jan 2022 10:04:56 +0200 Subject: RDMA/mad: Delete duplicated init_query_mad functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several drivers used same function to initialize query MAD, so move that function to global header file. Link: https://lore.kernel.org/r/af6f35c590ff5ef56d0137351b8b295af0f7c13c.1641369858.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky Reviewed-by: Håkon Bugge Signed-off-by: Jason Gunthorpe --- include/rdma/ib_smi.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h index fdb8633cbaff..fc16b826b2c1 100644 --- a/include/rdma/ib_smi.h +++ b/include/rdma/ib_smi.h @@ -144,5 +144,15 @@ ib_get_smp_direction(struct ib_smp *smp) #define IB_NOTICE_TRAP_DR_NOTICE 0x80 #define IB_NOTICE_TRAP_DR_TRUNC 0x40 - +/** + * ib_init_query_mad - Initialize query MAD. + * @mad: MAD to initialize. + */ +static inline void ib_init_query_mad(struct ib_smp *mad) +{ + mad->base_version = IB_MGMT_BASE_VERSION; + mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->class_version = 1; + mad->method = IB_MGMT_METHOD_GET; +} #endif /* IB_SMI_H */ -- cgit v1.2.3 From edce22e19bfa86efa2522d041d6367f2f099e8ed Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 5 Jan 2022 09:05:15 -0800 Subject: block: move rq_list macros to blk-mq.h Move the request list macros to the header file that defines that struct they operate on. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220105170518.3181469-2-kbusch@kernel.org Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 29 +++++++++++++++++++++++++++++ include/linux/blkdev.h | 29 ----------------------------- 2 files changed, 29 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 550996cf419c..bf64b94cd64e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -216,6 +216,35 @@ static inline unsigned short req_get_ioprio(struct request *req) #define rq_dma_dir(rq) \ (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) +#define rq_list_add(listptr, rq) do { \ + (rq)->rq_next = *(listptr); \ + *(listptr) = rq; \ +} while (0) + +#define rq_list_pop(listptr) \ +({ \ + struct request *__req = NULL; \ + if ((listptr) && *(listptr)) { \ + __req = *(listptr); \ + *(listptr) = __req->rq_next; \ + } \ + __req; \ +}) + +#define rq_list_peek(listptr) \ +({ \ + struct request *__req = NULL; \ + if ((listptr) && *(listptr)) \ + __req = *(listptr); \ + __req; \ +}) + +#define rq_list_for_each(listptr, pos) \ + for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos)) + +#define rq_list_next(rq) (rq)->rq_next +#define rq_list_empty(list) ((list) == (struct request *) NULL) + enum blk_eh_timer_return { BLK_EH_DONE, /* drivers has completed the command */ BLK_EH_RESET_TIMER, /* reset timer and try again */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 22746b2d6825..9c95df26fc26 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1339,33 +1339,4 @@ struct io_comp_batch { #define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } -#define rq_list_add(listptr, rq) do { \ - (rq)->rq_next = *(listptr); \ - *(listptr) = rq; \ -} while (0) - -#define rq_list_pop(listptr) \ -({ \ - struct request *__req = NULL; \ - if ((listptr) && *(listptr)) { \ - __req = *(listptr); \ - *(listptr) = __req->rq_next; \ - } \ - __req; \ -}) - -#define rq_list_peek(listptr) \ -({ \ - struct request *__req = NULL; \ - if ((listptr) && *(listptr)) \ - __req = *(listptr); \ - __req; \ -}) - -#define rq_list_for_each(listptr, pos) \ - for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos)) - -#define rq_list_next(rq) (rq)->rq_next -#define rq_list_empty(list) ((list) == (struct request *) NULL) - #endif /* _LINUX_BLKDEV_H */ -- cgit v1.2.3 From 3764fd05e1f89530e2ee5cbff0b638f2b1141b90 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 5 Jan 2022 09:05:16 -0800 Subject: block: introduce rq_list_for_each_safe macro While iterating a list, a particular request may need to be removed for special handling. Provide an iterator that can safely handle that. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20220105170518.3181469-3-kbusch@kernel.org Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index bf64b94cd64e..1467f0fa2142 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -242,6 +242,10 @@ static inline unsigned short req_get_ioprio(struct request *req) #define rq_list_for_each(listptr, pos) \ for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos)) +#define rq_list_for_each_safe(listptr, pos, nxt) \ + for (pos = rq_list_peek((listptr)), nxt = rq_list_next(pos); \ + pos; pos = nxt, nxt = pos ? rq_list_next(pos) : NULL) + #define rq_list_next(rq) (rq)->rq_next #define rq_list_empty(list) ((list) == (struct request *) NULL) -- cgit v1.2.3 From d2528be7a8b09af9796a270debd14101a72bb552 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 5 Jan 2022 09:05:17 -0800 Subject: block: introduce rq_list_move When iterating a list, a particular request may need to be moved for special handling. Provide a helper function to achieve that so drivers don't need to reimplement rqlist manipulation. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220105170518.3181469-4-kbusch@kernel.org Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 1467f0fa2142..f40a05ecca4a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -249,6 +249,23 @@ static inline unsigned short req_get_ioprio(struct request *req) #define rq_list_next(rq) (rq)->rq_next #define rq_list_empty(list) ((list) == (struct request *) NULL) +/** + * rq_list_move() - move a struct request from one list to another + * @src: The source list @rq is currently in + * @dst: The destination list that @rq will be appended to + * @rq: The request to move + * @prev: The request preceding @rq in @src (NULL if @rq is the head) + */ +static void inline rq_list_move(struct request **src, struct request **dst, + struct request *rq, struct request *prev) +{ + if (prev) + prev->rq_next = rq->rq_next; + else + *src = rq->rq_next; + rq_list_add(dst, rq); +} + enum blk_eh_timer_return { BLK_EH_DONE, /* drivers has completed the command */ BLK_EH_RESET_TIMER, /* reset timer and try again */ -- cgit v1.2.3 From fbdb0ba7051e9b0881708c1c7bb491363cb7e486 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Wed, 15 Dec 2021 09:54:31 +0200 Subject: IB/mlx5: Expose NDR speed through MAD Under MAD query port, Report NDR speed when NDR is supported in the port capability mask. Link: https://lore.kernel.org/r/a2ab630d2a634547db9b581faa9d65da2edb9d05.1639554831.git.leonro@nvidia.com Signed-off-by: Maher Sanalla Reviewed-by: Michael Guralnik Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_mad.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 465b0d0bdaf8..2e3843b761e8 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -276,6 +276,7 @@ enum ib_port_capability_mask2_bits { IB_PORT_SWITCH_PORT_STATE_TABLE_SUP = 1 << 3, IB_PORT_LINK_WIDTH_2X_SUP = 1 << 4, IB_PORT_LINK_SPEED_HDR_SUP = 1 << 5, + IB_PORT_LINK_SPEED_NDR_SUP = 1 << 10, }; #define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26) -- cgit v1.2.3 From ffa81a03267b450cb8c7bc0d327c05c99de579a4 Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Mon, 25 Oct 2021 12:10:36 +0900 Subject: dt-bindings: clock: Add DT bindings for SMU of Toshiba Visconti TMPV770x SoC Add device tree bindings for SMU (System Management Unit) controller of Toshiba Visconti TMPV770x SoC series. Signed-off-by: Nobuhiro Iwamatsu Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20211025031038.4180686-3-nobuhiro1.iwamatsu@toshiba.co.jp Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/toshiba,tmpv770x.h | 181 +++++++++++++++++++++++++++ include/dt-bindings/reset/toshiba,tmpv770x.h | 41 ++++++ 2 files changed, 222 insertions(+) create mode 100644 include/dt-bindings/clock/toshiba,tmpv770x.h create mode 100644 include/dt-bindings/reset/toshiba,tmpv770x.h (limited to 'include') diff --git a/include/dt-bindings/clock/toshiba,tmpv770x.h b/include/dt-bindings/clock/toshiba,tmpv770x.h new file mode 100644 index 000000000000..5fce713001fd --- /dev/null +++ b/include/dt-bindings/clock/toshiba,tmpv770x.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_CLOCK_TOSHIBA_TMPV770X_H_ +#define _DT_BINDINGS_CLOCK_TOSHIBA_TMPV770X_H_ + +/* PLL */ +#define TMPV770X_PLL_PIPLL0 0 +#define TMPV770X_PLL_PIPLL1 1 +#define TMPV770X_PLL_PIDNNPLL 2 +#define TMPV770X_PLL_PIETHERPLL 3 +#define TMPV770X_PLL_PIDDRCPLL 4 +#define TMPV770X_PLL_PIVOIFPLL 5 +#define TMPV770X_PLL_PIIMGERPLL 6 +#define TMPV770X_NR_PLL 7 + +/* Clocks */ +#define TMPV770X_CLK_PIPLL1_DIV1 0 +#define TMPV770X_CLK_PIPLL1_DIV2 1 +#define TMPV770X_CLK_PIPLL1_DIV4 2 +#define TMPV770X_CLK_PIDNNPLL_DIV1 3 +#define TMPV770X_CLK_DDRC_PHY_PLL0 4 +#define TMPV770X_CLK_DDRC_PHY_PLL1 5 +#define TMPV770X_CLK_D_PHYPLL 6 +#define TMPV770X_CLK_PHY_PCIEPLL 7 +#define TMPV770X_CLK_CA53CL0 8 +#define TMPV770X_CLK_CA53CL1 9 +#define TMPV770X_CLK_PISDMAC 10 +#define TMPV770X_CLK_PIPDMAC0 11 +#define TMPV770X_CLK_PIPDMAC1 12 +#define TMPV770X_CLK_PIWRAM 13 +#define TMPV770X_CLK_DDRC0 14 +#define TMPV770X_CLK_DDRC0_SCLK 15 +#define TMPV770X_CLK_DDRC0_NCLK 16 +#define TMPV770X_CLK_DDRC0_MCLK 17 +#define TMPV770X_CLK_DDRC0_APBCLK 18 +#define TMPV770X_CLK_DDRC1 19 +#define TMPV770X_CLK_DDRC1_SCLK 20 +#define TMPV770X_CLK_DDRC1_NCLK 21 +#define TMPV770X_CLK_DDRC1_MCLK 22 +#define TMPV770X_CLK_DDRC1_APBCLK 23 +#define TMPV770X_CLK_HOX 24 +#define TMPV770X_CLK_PCIE_MSTR 25 +#define TMPV770X_CLK_PCIE_AUX 26 +#define TMPV770X_CLK_PIINTC 27 +#define TMPV770X_CLK_PIETHER_BUS 28 +#define TMPV770X_CLK_PISPI0 29 +#define TMPV770X_CLK_PISPI1 30 +#define TMPV770X_CLK_PISPI2 31 +#define TMPV770X_CLK_PISPI3 32 +#define TMPV770X_CLK_PISPI4 33 +#define TMPV770X_CLK_PISPI5 34 +#define TMPV770X_CLK_PISPI6 35 +#define TMPV770X_CLK_PIUART0 36 +#define TMPV770X_CLK_PIUART1 37 +#define TMPV770X_CLK_PIUART2 38 +#define TMPV770X_CLK_PIUART3 39 +#define TMPV770X_CLK_PII2C0 40 +#define TMPV770X_CLK_PII2C1 41 +#define TMPV770X_CLK_PII2C2 42 +#define TMPV770X_CLK_PII2C3 43 +#define TMPV770X_CLK_PII2C4 44 +#define TMPV770X_CLK_PII2C5 45 +#define TMPV770X_CLK_PII2C6 46 +#define TMPV770X_CLK_PII2C7 47 +#define TMPV770X_CLK_PII2C8 48 +#define TMPV770X_CLK_PIGPIO 49 +#define TMPV770X_CLK_PIPGM 50 +#define TMPV770X_CLK_PIPCMIF 51 +#define TMPV770X_CLK_PIPCMIF_AUDIO_O 52 +#define TMPV770X_CLK_PIPCMIF_AUDIO_I 53 +#define TMPV770X_CLK_PICMPT0 54 +#define TMPV770X_CLK_PICMPT1 55 +#define TMPV770X_CLK_PITSC 56 +#define TMPV770X_CLK_PIUWDT 57 +#define TMPV770X_CLK_PISWDT 58 +#define TMPV770X_CLK_WDTCLK 59 +#define TMPV770X_CLK_PISUBUS_150M 60 +#define TMPV770X_CLK_PISUBUS_300M 61 +#define TMPV770X_CLK_PIPMU 62 +#define TMPV770X_CLK_PIGPMU 63 +#define TMPV770X_CLK_PITMU 64 +#define TMPV770X_CLK_WRCK 65 +#define TMPV770X_CLK_PIEMM 66 +#define TMPV770X_CLK_PIMISC 67 +#define TMPV770X_CLK_PIGCOMM 68 +#define TMPV770X_CLK_PIDCOMM 69 +#define TMPV770X_CLK_PICKMON 70 +#define TMPV770X_CLK_PIMBUS 71 +#define TMPV770X_CLK_SBUSCLK 72 +#define TMPV770X_CLK_DDR0_APBCLKCLK 73 +#define TMPV770X_CLK_DDR1_APBCLKCLK 74 +#define TMPV770X_CLK_DSP0_PBCLK 75 +#define TMPV770X_CLK_DSP1_PBCLK 76 +#define TMPV770X_CLK_DSP2_PBCLK 77 +#define TMPV770X_CLK_DSP3_PBCLK 78 +#define TMPV770X_CLK_DSVIIF0_APBCLK 79 +#define TMPV770X_CLK_VIIF0_APBCLK 80 +#define TMPV770X_CLK_VIIF0_CFGCLK 81 +#define TMPV770X_CLK_VIIF1_APBCLK 82 +#define TMPV770X_CLK_VIIF1_CFGCLK 83 +#define TMPV770X_CLK_VIIF2_APBCLK 84 +#define TMPV770X_CLK_VIIF2_CFGCLK 85 +#define TMPV770X_CLK_VIIF3_APBCLK 86 +#define TMPV770X_CLK_VIIF3_CFGCLK 87 +#define TMPV770X_CLK_VIIF4_APBCLK 88 +#define TMPV770X_CLK_VIIF4_CFGCLK 89 +#define TMPV770X_CLK_VIIF5_APBCLK 90 +#define TMPV770X_CLK_VIIF5_CFGCLK 91 +#define TMPV770X_CLK_VOIF_SBUSCLK 92 +#define TMPV770X_CLK_VOIF_PROCCLK 93 +#define TMPV770X_CLK_VOIF_DPHYCFGCLK 94 +#define TMPV770X_CLK_DNN0 95 +#define TMPV770X_CLK_STMAT 96 +#define TMPV770X_CLK_HWA0 97 +#define TMPV770X_CLK_AFFINE0 98 +#define TMPV770X_CLK_HAMAT 99 +#define TMPV770X_CLK_SMLDB 100 +#define TMPV770X_CLK_HWA0_ASYNC 101 +#define TMPV770X_CLK_HWA2 102 +#define TMPV770X_CLK_FLMAT 103 +#define TMPV770X_CLK_PYRAMID 104 +#define TMPV770X_CLK_HWA2_ASYNC 105 +#define TMPV770X_CLK_DSP0 106 +#define TMPV770X_CLK_VIIFBS0 107 +#define TMPV770X_CLK_VIIFBS0_L2ISP 108 +#define TMPV770X_CLK_VIIFBS0_L1ISP 109 +#define TMPV770X_CLK_VIIFBS0_PROC 110 +#define TMPV770X_CLK_VIIFBS1 111 +#define TMPV770X_CLK_VIIFBS2 112 +#define TMPV770X_CLK_VIIFOP_MBUS 113 +#define TMPV770X_CLK_VIIFOP0_PROC 114 +#define TMPV770X_CLK_PIETHER_2P5M 115 +#define TMPV770X_CLK_PIETHER_25M 116 +#define TMPV770X_CLK_PIETHER_50M 117 +#define TMPV770X_CLK_PIETHER_125M 118 +#define TMPV770X_CLK_VOIF0_DPHYCFG 119 +#define TMPV770X_CLK_VOIF0_PROC 120 +#define TMPV770X_CLK_VOIF0_SBUS 121 +#define TMPV770X_CLK_VOIF0_DSIREF 122 +#define TMPV770X_CLK_VOIF0_PIXEL 123 +#define TMPV770X_CLK_PIREFCLK 124 +#define TMPV770X_CLK_SBUS 125 +#define TMPV770X_CLK_BUSLCK 126 +#define TMPV770X_NR_CLK 127 + +/* Reset */ +#define TMPV770X_RESET_PIETHER_2P5M 0 +#define TMPV770X_RESET_PIETHER_25M 1 +#define TMPV770X_RESET_PIETHER_50M 2 +#define TMPV770X_RESET_PIETHER_125M 3 +#define TMPV770X_RESET_HOX 4 +#define TMPV770X_RESET_PCIE_MSTR 5 +#define TMPV770X_RESET_PCIE_AUX 6 +#define TMPV770X_RESET_PIINTC 7 +#define TMPV770X_RESET_PIETHER_BUS 8 +#define TMPV770X_RESET_PISPI0 9 +#define TMPV770X_RESET_PISPI1 10 +#define TMPV770X_RESET_PISPI2 11 +#define TMPV770X_RESET_PISPI3 12 +#define TMPV770X_RESET_PISPI4 13 +#define TMPV770X_RESET_PISPI5 14 +#define TMPV770X_RESET_PISPI6 15 +#define TMPV770X_RESET_PIUART0 16 +#define TMPV770X_RESET_PIUART1 17 +#define TMPV770X_RESET_PIUART2 18 +#define TMPV770X_RESET_PIUART3 19 +#define TMPV770X_RESET_PII2C0 20 +#define TMPV770X_RESET_PII2C1 21 +#define TMPV770X_RESET_PII2C2 22 +#define TMPV770X_RESET_PII2C3 23 +#define TMPV770X_RESET_PII2C4 24 +#define TMPV770X_RESET_PII2C5 25 +#define TMPV770X_RESET_PII2C6 26 +#define TMPV770X_RESET_PII2C7 27 +#define TMPV770X_RESET_PII2C8 28 +#define TMPV770X_RESET_PIPCMIF 29 +#define TMPV770X_RESET_PICKMON 30 +#define TMPV770X_RESET_SBUSCLK 31 +#define TMPV770X_NR_RESET 32 + +#endif /*_DT_BINDINGS_CLOCK_TOSHIBA_TMPV770X_H_ */ diff --git a/include/dt-bindings/reset/toshiba,tmpv770x.h b/include/dt-bindings/reset/toshiba,tmpv770x.h new file mode 100644 index 000000000000..c1007acb1941 --- /dev/null +++ b/include/dt-bindings/reset/toshiba,tmpv770x.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_RESET_TOSHIBA_TMPV770X_H_ +#define _DT_BINDINGS_RESET_TOSHIBA_TMPV770X_H_ + +/* Reset */ +#define TMPV770X_RESET_PIETHER_2P5M 0 +#define TMPV770X_RESET_PIETHER_25M 1 +#define TMPV770X_RESET_PIETHER_50M 2 +#define TMPV770X_RESET_PIETHER_125M 3 +#define TMPV770X_RESET_HOX 4 +#define TMPV770X_RESET_PCIE_MSTR 5 +#define TMPV770X_RESET_PCIE_AUX 6 +#define TMPV770X_RESET_PIINTC 7 +#define TMPV770X_RESET_PIETHER_BUS 8 +#define TMPV770X_RESET_PISPI0 9 +#define TMPV770X_RESET_PISPI1 10 +#define TMPV770X_RESET_PISPI2 11 +#define TMPV770X_RESET_PISPI3 12 +#define TMPV770X_RESET_PISPI4 13 +#define TMPV770X_RESET_PISPI5 14 +#define TMPV770X_RESET_PISPI6 15 +#define TMPV770X_RESET_PIUART0 16 +#define TMPV770X_RESET_PIUART1 17 +#define TMPV770X_RESET_PIUART2 18 +#define TMPV770X_RESET_PIUART3 19 +#define TMPV770X_RESET_PII2C0 20 +#define TMPV770X_RESET_PII2C1 21 +#define TMPV770X_RESET_PII2C2 22 +#define TMPV770X_RESET_PII2C3 23 +#define TMPV770X_RESET_PII2C4 24 +#define TMPV770X_RESET_PII2C5 25 +#define TMPV770X_RESET_PII2C6 26 +#define TMPV770X_RESET_PII2C7 27 +#define TMPV770X_RESET_PII2C8 28 +#define TMPV770X_RESET_PIPCMIF 29 +#define TMPV770X_RESET_PICKMON 30 +#define TMPV770X_RESET_SBUSCLK 31 +#define TMPV770X_NR_RESET 32 + +#endif /*_DT_BINDINGS_RESET_TOSHIBA_TMPV770X_H_ */ -- cgit v1.2.3 From 4a48ef70b93b8c7ed5190adfca18849e76387b80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 3 Jan 2022 16:08:06 +0100 Subject: xdp: Allow registering memory model without rxq reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The functions that register an XDP memory model take a struct xdp_rxq as parameter, but the RXQ is not actually used for anything other than pulling out the struct xdp_mem_info that it embeds. So refactor the register functions and export variants that just take a pointer to the xdp_mem_info. This is in preparation for enabling XDP_REDIRECT in bpf_prog_run(), using a page_pool instance that is not connected to any network device. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220103150812.87914-2-toke@redhat.com --- include/net/xdp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 447f9b1578f3..8f0812e4996d 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -260,6 +260,9 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, enum xdp_mem_type type, void *allocator); void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq); +int xdp_reg_mem_model(struct xdp_mem_info *mem, + enum xdp_mem_type type, void *allocator); +void xdp_unreg_mem_model(struct xdp_mem_info *mem); /* Drivers not supporting XDP metadata can use this helper, which * rejects any room expansion for metadata as a result. -- cgit v1.2.3 From 35b2e549894b7ef0b6e7f3a70c2ab75b767cfce9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 3 Jan 2022 16:08:07 +0100 Subject: page_pool: Add callback to init pages when they are allocated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new callback function to page_pool that, if set, will be called every time a new page is allocated. This will be used from bpf_test_run() to initialise the page data with the data provided by userspace when running XDP programs with redirect turned on. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/20220103150812.87914-3-toke@redhat.com --- include/net/page_pool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/page_pool.h b/include/net/page_pool.h index a4082406a003..d807b6800a4a 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -80,6 +80,8 @@ struct page_pool_params { enum dma_data_direction dma_dir; /* DMA mapping direction */ unsigned int max_len; /* max DMA sync memory size */ unsigned int offset; /* DMA addr offset */ + void (*init_callback)(struct page *page, void *arg); + void *init_arg; }; struct page_pool { -- cgit v1.2.3 From 64693ec7774e471f817a725686d93903e919a2e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 3 Jan 2022 16:08:08 +0100 Subject: page_pool: Store the XDP mem id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Store the XDP mem ID inside the page_pool struct so it can be retrieved later for use in bpf_prog_run(). Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/20220103150812.87914-4-toke@redhat.com --- include/net/page_pool.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/page_pool.h b/include/net/page_pool.h index d807b6800a4a..79a805542d0f 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -96,6 +96,7 @@ struct page_pool { unsigned int frag_offset; struct page *frag_page; long frag_users; + u32 xdp_mem_id; /* * Data structure for allocation side @@ -170,9 +171,12 @@ bool page_pool_return_skb_page(struct page *page); struct page_pool *page_pool_create(const struct page_pool_params *params); +struct xdp_mem_info; + #ifdef CONFIG_PAGE_POOL void page_pool_destroy(struct page_pool *pool); -void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); +void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), + struct xdp_mem_info *mem); void page_pool_release_page(struct page_pool *pool, struct page *page); void page_pool_put_page_bulk(struct page_pool *pool, void **data, int count); @@ -182,7 +186,8 @@ static inline void page_pool_destroy(struct page_pool *pool) } static inline void page_pool_use_xdp_mem(struct page_pool *pool, - void (*disconnect)(void *)) + void (*disconnect)(void *), + struct xdp_mem_info *mem) { } static inline void page_pool_release_page(struct page_pool *pool, -- cgit v1.2.3 From d53ad5d8b218a885e95080d4d3d556b16b91b1b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 3 Jan 2022 16:08:09 +0100 Subject: xdp: Move conversion to xdp_frame out of map functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All map redirect functions except XSK maps convert xdp_buff to xdp_frame before enqueueing it. So move this conversion of out the map functions and into xdp_do_redirect(). This removes a bit of duplicated code, but more importantly it makes it possible to support caller-allocated xdp_frame structures, which will be added in a subsequent commit. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220103150812.87914-5-toke@redhat.com --- include/linux/bpf.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 26753139d5b4..6e947cd91152 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1669,17 +1669,17 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); struct btf *bpf_get_btf_vmlinux(void); /* Map specifics */ -struct xdp_buff; +struct xdp_frame; struct sk_buff; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; void __dev_flush(void); -int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, +int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx); -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, struct net_device *dev_rx); -int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, +int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, struct bpf_prog *xdp_prog); @@ -1688,7 +1688,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, bool exclude_ingress); void __cpu_map_flush(void); -int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, +int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, struct net_device *dev_rx); int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, struct sk_buff *skb); @@ -1866,26 +1866,26 @@ static inline void __dev_flush(void) { } -struct xdp_buff; +struct xdp_frame; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; static inline -int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, +int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx) { return 0; } static inline -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, struct net_device *dev_rx) { return 0; } static inline -int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, +int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress) { return 0; @@ -1913,7 +1913,7 @@ static inline void __cpu_map_flush(void) } static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, - struct xdp_buff *xdp, + struct xdp_frame *xdpf, struct net_device *dev_rx) { return 0; -- cgit v1.2.3 From 1372d34ccf6dd480332b2bcb2fd59a2b9a0df415 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 3 Jan 2022 16:08:10 +0100 Subject: xdp: Add xdp_do_redirect_frame() for pre-computed xdp_frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an xdp_do_redirect_frame() variant which supports pre-computed xdp_frame structures. This will be used in bpf_prog_run() to avoid having to write to the xdp_frame structure when the XDP program doesn't modify the frame boundaries. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220103150812.87914-6-toke@redhat.com --- include/linux/filter.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 60eec80fa1d4..71fa57b88bfc 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1019,6 +1019,10 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *prog); +int xdp_do_redirect_frame(struct net_device *dev, + struct xdp_buff *xdp, + struct xdp_frame *xdpf, + struct bpf_prog *prog); void xdp_do_flush(void); /* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as -- cgit v1.2.3 From 335e4dd67b480c8fa571ea7e71af0d22047fcfb7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 4 Jan 2022 09:46:06 +0100 Subject: xen/x86: obtain upper 32 bits of video frame buffer address for Dom0 The hypervisor has been supplying this information for a couple of major releases. Make use of it. The need to set a flag in the capabilities field also points out that the prior setting of that field from the hypervisor interface's gbl_caps one was wrong, so that code gets deleted (there's also no equivalent of this in native boot code). Signed-off-by: Jan Beulich Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/a3df8bf3-d044-b7bb-3383-cd5239d6d4af@suse.com Signed-off-by: Juergen Gross --- include/xen/interface/xen.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 5e9916939268..0ca23eca2a9c 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -722,6 +722,9 @@ struct dom0_vga_console_info { uint32_t gbl_caps; /* Mode attributes (offset 0x0, VESA command 0x4f01). */ uint16_t mode_attrs; + uint16_t pad; + /* high 32 bits of lfb_base */ + uint32_t ext_lfb_base; } vesa_lfb; } u; }; -- cgit v1.2.3 From 9dd060afe2dfd4e3f67b6732fdc681e52cd7cbd9 Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Thu, 9 Dec 2021 22:05:33 +0200 Subject: xen/balloon: Bring alloc(free)_xenballooned_pages helpers back This patch rolls back some of the changes introduced by commit 121f2faca2c0a "xen/balloon: rename alloc/free_xenballooned_pages" in order to make possible to still allocate xenballooned pages if CONFIG_XEN_UNPOPULATED_ALLOC is enabled. On Arm the unpopulated pages will be allocated on top of extended regions provided by Xen via device-tree (the subsequent patches will add required bits to support unpopulated-alloc feature on Arm). The problem is that extended regions feature has been introduced into Xen quite recently (during 4.16 release cycle). So this effectively means that Linux must only use unpopulated-alloc on Arm if it is running on "new Xen" which advertises these regions. But, it will only be known after parsing the "hypervisor" node at boot time, so before doing that we cannot assume anything. In order to keep working if CONFIG_XEN_UNPOPULATED_ALLOC is enabled and the extended regions are not advertised (Linux is running on "old Xen", etc) we need the fallback to alloc_xenballooned_pages(). This way we wouldn't reduce the amount of memory usable (wasting RAM pages) for any of the external mappings anymore (and eliminate XSA-300) with "new Xen", but would be still functional ballooning out RAM pages with "old Xen". Also rename alloc(free)_xenballooned_pages to xen_alloc(free)_ballooned_pages and make xen_alloc(free)_unpopulated_pages static inline in xen.h if CONFIG_XEN_UNPOPULATED_ALLOC is disabled. Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/1639080336-26573-4-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross --- include/xen/balloon.h | 3 +++ include/xen/xen.h | 14 ++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/xen/balloon.h b/include/xen/balloon.h index e93d4f0088c5..f78a6cc94f1a 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -26,6 +26,9 @@ extern struct balloon_stats balloon_stats; void balloon_set_new_target(unsigned long target); +int xen_alloc_ballooned_pages(unsigned int nr_pages, struct page **pages); +void xen_free_ballooned_pages(unsigned int nr_pages, struct page **pages); + #ifdef CONFIG_XEN_BALLOON void xen_balloon_init(void); #else diff --git a/include/xen/xen.h b/include/xen/xen.h index 9f031b5faa54..86c5b37684d9 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -52,7 +52,21 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, extern u64 xen_saved_max_mem_size; #endif +#ifdef CONFIG_XEN_UNPOPULATED_ALLOC int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages); void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages); +#else +#include +static inline int xen_alloc_unpopulated_pages(unsigned int nr_pages, + struct page **pages) +{ + return xen_alloc_ballooned_pages(nr_pages, pages); +} +static inline void xen_free_unpopulated_pages(unsigned int nr_pages, + struct page **pages) +{ + xen_free_ballooned_pages(nr_pages, pages); +} +#endif #endif /* _XEN_XEN_H */ -- cgit v1.2.3 From d1a928eac72962b562162c25baf45ce147e27247 Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Thu, 9 Dec 2021 22:05:34 +0200 Subject: xen/unpopulated-alloc: Add mechanism to use Xen resource The main reason of this change is that unpopulated-alloc code cannot be used in its current form on Arm, but there is a desire to reuse it to avoid wasting real RAM pages for the grant/foreign mappings. The problem is that system "iomem_resource" is used for the address space allocation, but the really unallocated space can't be figured out precisely by the domain on Arm without hypervisor involvement. For example, not all device I/O regions are known by the time domain starts creating grant/foreign mappings. And following the advise from "iomem_resource" we might end up reusing these regions by a mistake. So, the hypervisor which maintains the P2M for the domain is in the best position to provide unused regions of guest physical address space which could be safely used to create grant/foreign mappings. Introduce new helper arch_xen_unpopulated_init() which purpose is to create specific Xen resource based on the memory regions provided by the hypervisor to be used as unused space for Xen scratch pages. If arch doesn't define arch_xen_unpopulated_init() the default "iomem_resource" will be used. Update the arguments list of allocate_resource() in fill_list() to always allocate a region from the hotpluggable range (maximum possible addressable physical memory range for which the linear mapping could be created). If arch doesn't define arch_get_mappable_range() the default range (0,-1) will be used. The behaviour on x86 won't be changed by current patch as both arch_xen_unpopulated_init() and arch_get_mappable_range() are not implemented for it. Also fallback to allocate xenballooned pages (balloon out RAM pages) if we do not have any suitable resource to work with (target_resource is invalid) and as the result we won't be able to provide unpopulated pages on a request. Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/1639080336-26573-5-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross --- include/xen/xen.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/xen/xen.h b/include/xen/xen.h index 86c5b37684d9..a99bab817523 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -55,6 +55,8 @@ extern u64 xen_saved_max_mem_size; #ifdef CONFIG_XEN_UNPOPULATED_ALLOC int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages); void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages); +#include +int arch_xen_unpopulated_init(struct resource **res); #else #include static inline int xen_alloc_unpopulated_pages(unsigned int nr_pages, -- cgit v1.2.3 From a457fd5660efa9cf960d2461156a1025bfdb13fa Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 26 Nov 2021 17:05:51 +0530 Subject: RISC-V: KVM: Add VM capability to allow userspace get GPA bits The number of GPA bits supported for a RISC-V Guest/VM is based on the MMU mode used by the G-stage translation. The KVM RISC-V will detect and use the best possible MMU mode for the G-stage in kvm_arch_init(). We add a generic VM capability KVM_CAP_VM_GPA_BITS which can be used by the KVM userspace to get the number of GPA (guest physical address) bits supported for a Guest/VM. Signed-off-by: Anup Patel Reviewed-and-tested-by: Atish Patra --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 1daa45268de2..469f05d69c8d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1131,6 +1131,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #define KVM_CAP_ARM_MTE 205 #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 +#define KVM_CAP_VM_GPA_BITS 207 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From ae16d059f8c9409eba0c412639def0494765b761 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 26 Oct 2021 18:22:44 +0200 Subject: mm/slub: Make object_err() static There are no callers outside of mm/slub.c anymore. Move freelist_corrupted() that calls object_err() to avoid a need for forward declaration. Signed-off-by: Vlastimil Babka Reviewed-by: Roman Gushchin --- include/linux/slub_def.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 0fa751b946fa..1ef68d4de9c0 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -156,9 +156,6 @@ static inline void sysfs_slab_release(struct kmem_cache *s) } #endif -void object_err(struct kmem_cache *s, struct page *page, - u8 *object, char *reason); - void *fixup_red_left(struct kmem_cache *s, void *p); static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, -- cgit v1.2.3 From d122019bf061cccc4583eb9ad40bf58c2fe517be Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Oct 2021 14:45:51 +0100 Subject: mm: Split slab into its own type Make struct slab independent of struct page. It still uses the underlying memory in struct page for storing slab-specific data, but slab and slub can now be weaned off using struct page directly. Some of the wrapper functions (slab_address() and slab_order()) still need to cast to struct folio, but this is a significant disentanglement. [ vbabka@suse.cz: Rebase on folios, use folio instead of page where possible. Do not duplicate flags field in struct slab, instead make the related accessors go through slab_folio(). For testing pfmemalloc use the folio_*_active flag accessors directly so the PageSlabPfmemalloc wrappers can be removed later. Make folio_slab() expect only folio_test_slab() == true folios and virt_to_slab() return NULL when folio_test_slab() == false. Move struct slab to mm/slab.h. Don't represent with struct slab pages that are not true slab pages, but just a compound page obtained directly rom page allocator (with large kmalloc() for SLUB and SLOB). ] Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Vlastimil Babka Acked-by: Johannes Weiner Reviewed-by: Roman Gushchin --- include/linux/mm_types.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c3a6e6209600..1ae3537c7920 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -56,11 +56,11 @@ struct mem_cgroup; * in each subpage, but you may need to restore some of their values * afterwards. * - * SLUB uses cmpxchg_double() to atomically update its freelist and - * counters. That requires that freelist & counters be adjacent and - * double-word aligned. We align all struct pages to double-word - * boundaries, and ensure that 'freelist' is aligned within the - * struct. + * SLUB uses cmpxchg_double() to atomically update its freelist and counters. + * That requires that freelist & counters in struct slab be adjacent and + * double-word aligned. Because struct slab currently just reinterprets the + * bits of struct page, we align all struct pages to double-word boundaries, + * and ensure that 'freelist' is aligned within struct slab. */ #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE #define _struct_page_alignment __aligned(2 * sizeof(unsigned long)) -- cgit v1.2.3 From 0b3eb091d5759479d44cb793fad2c51ea06bdcec Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Oct 2021 14:45:56 +0100 Subject: mm: Convert check_heap_object() to use struct slab Ensure that we're not seeing a tail page inside __check_heap_object() by converting to a slab instead of a page. Take the opportunity to mark the slab as const since we're not modifying it. Also move the declaration of __check_heap_object() to mm/slab.h so it's not available to the wider kernel. [ vbabka@suse.cz: in check_heap_object() only convert to struct slab for actual PageSlab pages; use folio as intermediate step instead of page ] Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Vlastimil Babka Reviewed-by: Roman Gushchin --- include/linux/slab.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 181045148b06..367366f1d1ff 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -189,14 +189,6 @@ bool kmem_valid_obj(void *object); void kmem_dump_obj(void *object); #endif -#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR -void __check_heap_object(const void *ptr, unsigned long n, struct page *page, - bool to_user); -#else -static inline void __check_heap_object(const void *ptr, unsigned long n, - struct page *page, bool to_user) { } -#endif - /* * Some archs want to perform DMA into kmalloc caches and need a guaranteed * alignment larger than the alignment of a 64-bit integer. -- cgit v1.2.3 From bb192ed9aa7191a5d65548f82c42b6750d65f569 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 3 Nov 2021 15:39:59 +0100 Subject: mm/slub: Convert most struct page to struct slab by spatch The majority of conversion from struct page to struct slab in SLUB internals can be delegated to a coccinelle semantic patch. This includes renaming of variables with 'page' in name to 'slab', and similar. Big thanks to Julia Lawall and Luis Chamberlain for help with coccinelle. // Options: --include-headers --no-includes --smpl-spacing include/linux/slub_def.h mm/slub.c // Note: needs coccinelle 1.1.1 to avoid breaking whitespace, and ocaml for the // embedded script // build list of functions to exclude from applying the next rule @initialize:ocaml@ @@ let ok_function p = not (List.mem (List.hd p).current_element ["nearest_obj";"obj_to_index";"objs_per_slab_page";"__slab_lock";"__slab_unlock";"free_nonslab_page";"kmalloc_large_node"]) // convert the type from struct page to struct page in all functions except the // list from previous rule // this also affects struct kmem_cache_cpu, but that's ok @@ position p : script:ocaml() { ok_function p }; @@ - struct page@p + struct slab // in struct kmem_cache_cpu, change the name from page to slab // the type was already converted by the previous rule @@ @@ struct kmem_cache_cpu { ... -struct slab *page; +struct slab *slab; ... } // there are many places that use c->page which is now c->slab after the // previous rule @@ struct kmem_cache_cpu *c; @@ -c->page +c->slab @@ @@ struct kmem_cache { ... - unsigned int cpu_partial_pages; + unsigned int cpu_partial_slabs; ... } @@ struct kmem_cache *s; @@ - s->cpu_partial_pages + s->cpu_partial_slabs @@ @@ static void - setup_page_debug( + setup_slab_debug( ...) {...} @@ @@ - setup_page_debug( + setup_slab_debug( ...); // for all functions (with exceptions), change any "struct slab *page" // parameter to "struct slab *slab" in the signature, and generally all // occurences of "page" to "slab" in the body - with some special cases. @@ identifier fn !~ "free_nonslab_page|obj_to_index|objs_per_slab_page|nearest_obj"; @@ fn(..., - struct slab *page + struct slab *slab ,...) { <... - page + slab ...> } // similar to previous but the param is called partial_page @@ identifier fn; @@ fn(..., - struct slab *partial_page + struct slab *partial_slab ,...) { <... - partial_page + partial_slab ...> } // similar to previous but for functions that take pointer to struct page ptr @@ identifier fn; @@ fn(..., - struct slab **ret_page + struct slab **ret_slab ,...) { <... - ret_page + ret_slab ...> } // functions converted by previous rules that were temporarily called using // slab_page(E) so we want to remove the wrapper now that they accept struct // slab ptr directly @@ identifier fn =~ "slab_free|do_slab_free"; expression E; @@ fn(..., - slab_page(E) + E ,...) // similar to previous but for another pattern @@ identifier fn =~ "slab_pad_check|check_object"; @@ fn(..., - folio_page(folio, 0) + slab ,...) // functions that were returning struct page ptr and now will return struct // slab ptr, including slab_page() wrapper removal @@ identifier fn =~ "allocate_slab|new_slab"; expression E; @@ static -struct slab * +struct slab * fn(...) { <... - slab_page(E) + E ...> } // rename any former struct page * declarations @@ @@ struct slab * ( - page + slab | - partial_page + partial_slab | - oldpage + oldslab ) ; // this has to be separate from previous rule as page and page2 appear at the // same line @@ @@ struct slab * -page2 +slab2 ; // similar but with initial assignment @@ expression E; @@ struct slab * ( - page + slab | - flush_page + flush_slab | - discard_page + slab_to_discard | - page_to_unfreeze + slab_to_unfreeze ) = E; // convert most of struct page to struct slab usage inside functions (with // exceptions), including specific variable renames @@ identifier fn !~ "nearest_obj|obj_to_index|objs_per_slab_page|__slab_(un)*lock|__free_slab|free_nonslab_page|kmalloc_large_node"; expression E; @@ fn(...) { <... ( - int pages; + int slabs; | - int pages = E; + int slabs = E; | - page + slab | - flush_page + flush_slab | - partial_page + partial_slab | - oldpage->pages + oldslab->slabs | - oldpage + oldslab | - unsigned int nr_pages; + unsigned int nr_slabs; | - nr_pages + nr_slabs | - unsigned int partial_pages = E; + unsigned int partial_slabs = E; | - partial_pages + partial_slabs ) ...> } // this has to be split out from the previous rule so that lines containing // multiple matching changes will be fully converted @@ identifier fn !~ "nearest_obj|obj_to_index|objs_per_slab_page|__slab_(un)*lock|__free_slab|free_nonslab_page|kmalloc_large_node"; @@ fn(...) { <... ( - slab->pages + slab->slabs | - pages + slabs | - page2 + slab2 | - discard_page + slab_to_discard | - page_to_unfreeze + slab_to_unfreeze ) ...> } // after we simply changed all occurences of page to slab, some usages need // adjustment for slab-specific functions, or use slab_page() wrapper @@ identifier fn !~ "nearest_obj|obj_to_index|objs_per_slab_page|__slab_(un)*lock|__free_slab|free_nonslab_page|kmalloc_large_node"; @@ fn(...) { <... ( - page_slab(slab) + slab | - kasan_poison_slab(slab) + kasan_poison_slab(slab_page(slab)) | - page_address(slab) + slab_address(slab) | - page_size(slab) + slab_size(slab) | - PageSlab(slab) + folio_test_slab(slab_folio(slab)) | - page_to_nid(slab) + slab_nid(slab) | - compound_order(slab) + slab_order(slab) ) ...> } Signed-off-by: Vlastimil Babka Reviewed-by: Roman Gushchin Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Julia Lawall Cc: Luis Chamberlain --- include/linux/slub_def.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 1ef68d4de9c0..00d99afe1c0e 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -48,9 +48,9 @@ enum stat_item { struct kmem_cache_cpu { void **freelist; /* Pointer to next available object */ unsigned long tid; /* Globally unique transaction id */ - struct page *page; /* The slab from which we are allocating */ + struct slab *slab; /* The slab from which we are allocating */ #ifdef CONFIG_SLUB_CPU_PARTIAL - struct page *partial; /* Partially allocated frozen slabs */ + struct slab *partial; /* Partially allocated frozen slabs */ #endif local_lock_t lock; /* Protects the fields above */ #ifdef CONFIG_SLUB_STATS @@ -100,7 +100,7 @@ struct kmem_cache { /* Number of per cpu partial objects to keep around */ unsigned int cpu_partial; /* Number of per cpu partial pages to keep around */ - unsigned int cpu_partial_pages; + unsigned int cpu_partial_slabs; #endif struct kmem_cache_order_objects oo; -- cgit v1.2.3 From c2092c12064a9728b2928979f88575cc1c2247fa Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 15 Nov 2021 16:55:15 +0100 Subject: mm/slub: Finish struct page to struct slab conversion Update comments mentioning pages to mention slabs where appropriate. Also some goto labels. Signed-off-by: Vlastimil Babka Reviewed-by: Roman Gushchin --- include/linux/slub_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 00d99afe1c0e..8a9c2876ca89 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -99,7 +99,7 @@ struct kmem_cache { #ifdef CONFIG_SLUB_CPU_PARTIAL /* Number of per cpu partial objects to keep around */ unsigned int cpu_partial; - /* Number of per cpu partial pages to keep around */ + /* Number of per cpu partial slabs to keep around */ unsigned int cpu_partial_slabs; #endif struct kmem_cache_order_objects oo; -- cgit v1.2.3 From 40f3bf0cb04c91d33531b1b95788ad2f0e4062cf Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 2 Nov 2021 15:42:04 +0100 Subject: mm: Convert struct page to struct slab in functions used by other subsystems KASAN, KFENCE and memcg interact with SLAB or SLUB internals through functions nearest_obj(), obj_to_index() and objs_per_slab() that use struct page as parameter. This patch converts it to struct slab including all callers, through a coccinelle semantic patch. // Options: --include-headers --no-includes --smpl-spacing include/linux/slab_def.h include/linux/slub_def.h mm/slab.h mm/kasan/*.c mm/kfence/kfence_test.c mm/memcontrol.c mm/slab.c mm/slub.c // Note: needs coccinelle 1.1.1 to avoid breaking whitespace @@ @@ -objs_per_slab_page( +objs_per_slab( ... ) { ... } @@ @@ -objs_per_slab_page( +objs_per_slab( ... ) @@ identifier fn =~ "obj_to_index|objs_per_slab"; @@ fn(..., - const struct page *page + const struct slab *slab ,...) { <... ( - page_address(page) + slab_address(slab) | - page + slab ) ...> } @@ identifier fn =~ "nearest_obj"; @@ fn(..., - struct page *page + const struct slab *slab ,...) { <... ( - page_address(page) + slab_address(slab) | - page + slab ) ...> } @@ identifier fn =~ "nearest_obj|obj_to_index|objs_per_slab"; expression E; @@ fn(..., ( - slab_page(E) + E | - virt_to_page(E) + virt_to_slab(E) | - virt_to_head_page(E) + virt_to_slab(E) | - page + page_slab(page) ) ,...) Signed-off-by: Vlastimil Babka Reviewed-by: Andrey Konovalov Reviewed-by: Roman Gushchin Acked-by: Johannes Weiner Cc: Julia Lawall Cc: Luis Chamberlain Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Marco Elver Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: Cc: --- include/linux/slab_def.h | 16 ++++++++-------- include/linux/slub_def.h | 18 +++++++++--------- 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 3aa5e1e73ab6..e24c9aff6fed 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -87,11 +87,11 @@ struct kmem_cache { struct kmem_cache_node *node[MAX_NUMNODES]; }; -static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, +static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab, void *x) { - void *object = x - (x - page->s_mem) % cache->size; - void *last_object = page->s_mem + (cache->num - 1) * cache->size; + void *object = x - (x - slab->s_mem) % cache->size; + void *last_object = slab->s_mem + (cache->num - 1) * cache->size; if (unlikely(object > last_object)) return last_object; @@ -106,16 +106,16 @@ static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, * reciprocal_divide(offset, cache->reciprocal_buffer_size) */ static inline unsigned int obj_to_index(const struct kmem_cache *cache, - const struct page *page, void *obj) + const struct slab *slab, void *obj) { - u32 offset = (obj - page->s_mem); + u32 offset = (obj - slab->s_mem); return reciprocal_divide(offset, cache->reciprocal_buffer_size); } -static inline int objs_per_slab_page(const struct kmem_cache *cache, - const struct page *page) +static inline int objs_per_slab(const struct kmem_cache *cache, + const struct slab *slab) { - if (is_kfence_address(page_address(page))) + if (is_kfence_address(slab_address(slab))) return 1; return cache->num; } diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 8a9c2876ca89..33c5c0e3bd8d 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -158,11 +158,11 @@ static inline void sysfs_slab_release(struct kmem_cache *s) void *fixup_red_left(struct kmem_cache *s, void *p); -static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, +static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab, void *x) { - void *object = x - (x - page_address(page)) % cache->size; - void *last_object = page_address(page) + - (page->objects - 1) * cache->size; + void *object = x - (x - slab_address(slab)) % cache->size; + void *last_object = slab_address(slab) + + (slab->objects - 1) * cache->size; void *result = (unlikely(object > last_object)) ? last_object : object; result = fixup_red_left(cache, result); @@ -178,16 +178,16 @@ static inline unsigned int __obj_to_index(const struct kmem_cache *cache, } static inline unsigned int obj_to_index(const struct kmem_cache *cache, - const struct page *page, void *obj) + const struct slab *slab, void *obj) { if (is_kfence_address(obj)) return 0; - return __obj_to_index(cache, page_address(page), obj); + return __obj_to_index(cache, slab_address(slab), obj); } -static inline int objs_per_slab_page(const struct kmem_cache *cache, - const struct page *page) +static inline int objs_per_slab(const struct kmem_cache *cache, + const struct slab *slab) { - return page->objects; + return slab->objects; } #endif /* _LINUX_SLUB_DEF_H */ -- cgit v1.2.3 From 4b5f8d9a895ada8e0abb58ccd35d9fe229e3a595 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 2 Nov 2021 22:42:04 +0100 Subject: mm/memcg: Convert slab objcgs from struct page to struct slab page->memcg_data is used with MEMCG_DATA_OBJCGS flag only for slab pages so convert all the related infrastructure to struct slab. Also use struct folio instead of struct page when resolving object pointers. This is not just mechanistic changing of types and names. Now in mem_cgroup_from_obj() we use folio_test_slab() to decide if we interpret the folio as a real slab instead of a large kmalloc, instead of relying on MEMCG_DATA_OBJCGS bit that used to be checked in page_objcgs_check(). Similarly in memcg_slab_free_hook() where we can encounter kmalloc_large() pages (here the folio slab flag check is implied by virt_to_slab()). As a result, page_objcgs_check() can be dropped instead of converted. To avoid include cycles, move the inline definition of slab_objcgs() from memcontrol.h to mm/slab.h. Signed-off-by: Vlastimil Babka Reviewed-by: Roman Gushchin Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: --- include/linux/memcontrol.h | 48 ---------------------------------------------- 1 file changed, 48 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0c5c403f4be6..e34112f6a369 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -536,45 +536,6 @@ static inline bool folio_memcg_kmem(struct folio *folio) return folio->memcg_data & MEMCG_DATA_KMEM; } -/* - * page_objcgs - get the object cgroups vector associated with a page - * @page: a pointer to the page struct - * - * Returns a pointer to the object cgroups vector associated with the page, - * or NULL. This function assumes that the page is known to have an - * associated object cgroups vector. It's not safe to call this function - * against pages, which might have an associated memory cgroup: e.g. - * kernel stack pages. - */ -static inline struct obj_cgroup **page_objcgs(struct page *page) -{ - unsigned long memcg_data = READ_ONCE(page->memcg_data); - - VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); - - return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); -} - -/* - * page_objcgs_check - get the object cgroups vector associated with a page - * @page: a pointer to the page struct - * - * Returns a pointer to the object cgroups vector associated with the page, - * or NULL. This function is safe to use if the page can be directly associated - * with a memory cgroup. - */ -static inline struct obj_cgroup **page_objcgs_check(struct page *page) -{ - unsigned long memcg_data = READ_ONCE(page->memcg_data); - - if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS)) - return NULL; - - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); - - return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); -} #else static inline bool folio_memcg_kmem(struct folio *folio) @@ -582,15 +543,6 @@ static inline bool folio_memcg_kmem(struct folio *folio) return false; } -static inline struct obj_cgroup **page_objcgs(struct page *page) -{ - return NULL; -} - -static inline struct obj_cgroup **page_objcgs_check(struct page *page) -{ - return NULL; -} #endif static inline bool PageMemcgKmem(struct page *page) -- cgit v1.2.3 From 6e48a966dfd18987fec9385566a67d36e2b5fc11 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Oct 2021 14:46:46 +0100 Subject: mm/kasan: Convert to struct folio and struct slab KASAN accesses some slab related struct page fields so we need to convert it to struct slab. Some places are a bit simplified thanks to kasan_addr_to_slab() encapsulating the PageSlab flag check through virt_to_slab(). When resolving object address to either a real slab or a large kmalloc, use struct folio as the intermediate type for testing the slab flag to avoid unnecessary implicit compound_head(). [ vbabka@suse.cz: use struct folio, adjust to differences in previous patches ] Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Vlastimil Babka Reviewed-by: Andrey Konovalov Reviewed-by: Roman Gushchin Tested-by: Hyeongogn Yoo <42.hyeyoo@gmail.com> Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: --- include/linux/kasan.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d8783b682669..fb78108d694e 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -9,6 +9,7 @@ struct kmem_cache; struct page; +struct slab; struct vm_struct; struct task_struct; @@ -193,11 +194,11 @@ static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache) return 0; } -void __kasan_poison_slab(struct page *page); -static __always_inline void kasan_poison_slab(struct page *page) +void __kasan_poison_slab(struct slab *slab); +static __always_inline void kasan_poison_slab(struct slab *slab) { if (kasan_enabled()) - __kasan_poison_slab(page); + __kasan_poison_slab(slab); } void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object); @@ -322,7 +323,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache, slab_flags_t *flags) {} static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {} static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } -static inline void kasan_poison_slab(struct page *page) {} +static inline void kasan_poison_slab(struct slab *slab) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) {} static inline void kasan_poison_object_data(struct kmem_cache *cache, -- cgit v1.2.3 From c5e97ed154589524a1df4ae2be55c4cfdb0d0573 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Oct 2021 14:46:48 +0100 Subject: bootmem: Use page->index instead of page->freelist page->freelist is for the use of slab. Using page->index is the same set of bits as page->freelist, and by using an integer instead of a pointer, we can avoid casts. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Vlastimil Babka Acked-by: Johannes Weiner Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Cc: "H. Peter Anvin" --- include/linux/bootmem_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h index 2bc8b1f69c93..cc35d010fa94 100644 --- a/include/linux/bootmem_info.h +++ b/include/linux/bootmem_info.h @@ -30,7 +30,7 @@ void put_page_bootmem(struct page *page); */ static inline void free_bootmem_page(struct page *page) { - unsigned long magic = (unsigned long)page->freelist; + unsigned long magic = page->index; /* * The reserve_bootmem_region sets the reserved flag on bootmem -- cgit v1.2.3 From 63cfc65753d604edc6cfe07e6fba2bf8ececb293 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 6 Jan 2022 00:11:49 +0200 Subject: net: dsa: don't enumerate dsa_switch and dsa_port bit fields using commas This is a cosmetic incremental fixup to commits 7787ff776398 ("net: dsa: merge all bools of struct dsa_switch into a single u32") bde82f389af1 ("net: dsa: merge all bools of struct dsa_port into a single u8") The desire to make this change was enunciated after posting these patches here: https://patchwork.kernel.org/project/netdevbpf/cover/20220105132141.2648876-1-vladimir.oltean@nxp.com/ but due to a slight timing overlap (message posted at 2:28 p.m. UTC, merge commit is at 2:46 p.m. UTC), that comment was missed and the changes were applied as-is. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 114 +++++++++++++++++++++++++++--------------------------- 1 file changed, 56 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 5d0fec6db3ae..63c7f553f938 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -265,14 +265,16 @@ struct dsa_port { u8 stp_state; - u8 vlan_filtering:1, - /* Managed by DSA on user ports and by - * drivers on CPU and DSA ports - */ - learning:1, - lag_tx_enabled:1, - devlink_port_setup:1, - setup:1; + u8 vlan_filtering:1; + + /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ + u8 learning:1; + + u8 lag_tx_enabled:1; + + u8 devlink_port_setup:1; + + u8 setup:1; struct device_node *dn; unsigned int ageing_time; @@ -331,56 +333,52 @@ struct dsa_switch { struct dsa_switch_tree *dst; unsigned int index; - u32 setup:1, - /* Disallow bridge core from requesting - * different VLAN awareness settings on ports - * if not hardware-supported - */ - vlan_filtering_is_global:1, - /* Keep VLAN filtering enabled on ports not - * offloading any upper - */ - needs_standalone_vlan_filtering:1, - /* Pass .port_vlan_add and .port_vlan_del to - * drivers even for bridges that have - * vlan_filtering=0. All drivers should ideally - * set this (and then the option would get - * removed), but it is unknown whether this - * would break things or not. - */ - configure_vlan_while_not_filtering:1, - /* If the switch driver always programs the CPU - * port as egress tagged despite the VLAN - * configuration indicating otherwise, then - * setting @untag_bridge_pvid will force the - * DSA receive path to pop the bridge's - * default_pvid VLAN tagged frames to offer a - * consistent behavior between a - * vlan_filtering=0 and vlan_filtering=1 bridge - * device. - */ - untag_bridge_pvid:1, - /* Let DSA manage the FDB entries towards the - * CPU, based on the software bridge database. - */ - assisted_learning_on_cpu_port:1, - /* In case vlan_filtering_is_global is set, the - * VLAN awareness state should be retrieved - * from here and not from the per-port - * settings. - */ - vlan_filtering:1, - /* MAC PCS does not provide link state change - * interrupt, and requires polling. Flag passed - * on to PHYLINK. - */ - pcs_poll:1, - /* For switches that only have the MRU - * configurable. To ensure the configured MTU - * is not exceeded, normalization of MRU on all - * bridged interfaces is needed. - */ - mtu_enforcement_ingress:1; + u32 setup:1; + + /* Disallow bridge core from requesting different VLAN awareness + * settings on ports if not hardware-supported + */ + u32 vlan_filtering_is_global:1; + + /* Keep VLAN filtering enabled on ports not offloading any upper */ + u32 needs_standalone_vlan_filtering:1; + + /* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges + * that have vlan_filtering=0. All drivers should ideally set this (and + * then the option would get removed), but it is unknown whether this + * would break things or not. + */ + u32 configure_vlan_while_not_filtering:1; + + /* If the switch driver always programs the CPU port as egress tagged + * despite the VLAN configuration indicating otherwise, then setting + * @untag_bridge_pvid will force the DSA receive path to pop the + * bridge's default_pvid VLAN tagged frames to offer a consistent + * behavior between a vlan_filtering=0 and vlan_filtering=1 bridge + * device. + */ + u32 untag_bridge_pvid:1; + + /* Let DSA manage the FDB entries towards the + * CPU, based on the software bridge database. + */ + u32 assisted_learning_on_cpu_port:1; + + /* In case vlan_filtering_is_global is set, the VLAN awareness state + * should be retrieved from here and not from the per-port settings. + */ + u32 vlan_filtering:1; + + /* MAC PCS does not provide link state change interrupt, and requires + * polling. Flag passed on to PHYLINK. + */ + u32 pcs_poll:1; + + /* For switches that only have the MRU configurable. To ensure the + * configured MTU is not exceeded, normalization of MRU on all bridged + * interfaces is needed. + */ + u32 mtu_enforcement_ingress:1; /* Listener for switch fabric events */ struct notifier_block nb; -- cgit v1.2.3 From 1b26d364e4e9bd6540a8e7bcaf50e7f35041feb5 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 6 Jan 2022 00:11:50 +0200 Subject: net: dsa: warn about dsa_port and dsa_switch bit fields being non atomic As discussed during review here: https://patchwork.kernel.org/project/netdevbpf/patch/20220105132141.2648876-3-vladimir.oltean@nxp.com/ we should inform developers about pitfalls of concurrent access to the boolean properties of dsa_switch and dsa_port, now that they've been converted to bit fields. No other measure than a comment needs to be taken, since the code paths that update these bit fields are not concurrent with each other. Suggested-by: Florian Fainelli Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 63c7f553f938..57b3e4e7413b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -265,6 +265,10 @@ struct dsa_port { u8 stp_state; + /* Warning: the following bit fields are not atomic, and updating them + * can only be done from code paths where concurrency is not possible + * (probe time or under rtnl_lock). + */ u8 vlan_filtering:1; /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ @@ -333,6 +337,10 @@ struct dsa_switch { struct dsa_switch_tree *dst; unsigned int index; + /* Warning: the following bit fields are not atomic, and updating them + * can only be done from code paths where concurrency is not possible + * (probe time or under rtnl_lock). + */ u32 setup:1; /* Disallow bridge core from requesting different VLAN awareness -- cgit v1.2.3 From 007747a984ea5e895b7d8b056b24ebf431e1e71d Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Wed, 5 Jan 2022 11:33:26 +0100 Subject: net: fix SOF_TIMESTAMPING_BIND_PHC to work with multiple sockets When multiple sockets using the SOF_TIMESTAMPING_BIND_PHC flag received a packet with a hardware timestamp (e.g. multiple PTP instances in different PTP domains using the UDPv4/v6 multicast or L2 transport), the timestamps received on some sockets were corrupted due to repeated conversion of the same timestamp (by the same or different vclocks). Fix ptp_convert_timestamp() to not modify the shared skb timestamp and return the converted timestamp as a ktime_t instead. If the conversion fails, return 0 to not confuse the application with timestamps corresponding to an unexpected PHC. Fixes: d7c088265588 ("net: socket: support hardware timestamp conversion to PHC bound") Signed-off-by: Miroslav Lichvar Cc: Yangbo Lu Cc: Richard Cochran Acked-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 2e5565067355..554454cb8693 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -351,15 +351,17 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index); * * @hwtstamps: skb_shared_hwtstamps structure pointer * @vclock_index: phc index of ptp vclock. + * + * Returns converted timestamp, or 0 on error. */ -void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, - int vclock_index); +ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps, + int vclock_index); #else static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index) { return 0; } -static inline void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, - int vclock_index) -{ } +static inline ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps, + int vclock_index) +{ return 0; } #endif -- cgit v1.2.3 From eac1b93c14d645ef147b049ace0d5230df755548 Mon Sep 17 00:00:00 2001 From: Coco Li Date: Wed, 5 Jan 2022 02:48:38 -0800 Subject: gro: add ability to control gro max packet size Eric Dumazet suggested to allow users to modify max GRO packet size. We have seen GRO being disabled by users of appliances (such as wifi access points) because of claimed bufferbloat issues, or some work arounds in sch_cake, to split GRO/GSO packets. Instead of disabling GRO completely, one can chose to limit the maximum packet size of GRO packets, depending on their latency constraints. This patch adds a per device gro_max_size attribute that can be changed with ip link command. ip link set dev eth0 gro_max_size 16000 Suggested-by: Eric Dumazet Signed-off-by: Coco Li Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 +++++++++++ include/uapi/linux/if_link.h | 1 + 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6f99c8f51b60..3213c7227b59 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1942,6 +1942,8 @@ enum netdev_ml_priv_type { * dev->addr_list_lock. * @unlink_list: As netif_addr_lock() can be called recursively, * keep a list of interfaces to be deleted. + * @gro_max_size: Maximum size of aggregated packet in generic + * receive offload (GRO) * * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. * @linkwatch_dev_tracker: refcount tracker used by linkwatch. @@ -2131,6 +2133,8 @@ struct net_device { struct bpf_prog __rcu *xdp_prog; unsigned long gro_flush_timeout; int napi_defer_hard_irqs; +#define GRO_MAX_SIZE 65536 + unsigned int gro_max_size; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; @@ -4806,6 +4810,13 @@ static inline void netif_set_gso_max_segs(struct net_device *dev, WRITE_ONCE(dev->gso_max_segs, segs); } +static inline void netif_set_gro_max_size(struct net_device *dev, + unsigned int size) +{ + /* This pairs with the READ_ONCE() in skb_gro_receive() */ + WRITE_ONCE(dev->gro_max_size, size); +} + static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, int pulled_hlen, u16 mac_offset, int mac_len) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4ac53b30b6dc..6218f93f5c1a 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -347,6 +347,7 @@ enum { */ IFLA_PARENT_DEV_NAME, IFLA_PARENT_DEV_BUS_NAME, + IFLA_GRO_MAX_SIZE, __IFLA_MAX }; -- cgit v1.2.3 From 72279d17df54d5e4e7910b39c61a3f3464e36633 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 5 Jan 2022 14:59:53 -0800 Subject: Bluetooth: hci_event: Rework hci_inquiry_result_with_rssi_evt This rework the handling of hci_inquiry_result_with_rssi_evt to not use a union to represent the different inquiry responses. Signed-off-by: Luiz Augusto von Dentz Tested-by: Soenke Huster Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index e2b06bb79e2e..35c073d44ec5 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2233,11 +2233,7 @@ struct inquiry_info_rssi_pscan { } __packed; struct hci_ev_inquiry_result_rssi { __u8 num; - struct inquiry_info_rssi info[]; -} __packed; -struct hci_ev_inquiry_result_rssi_pscan { - __u8 num; - struct inquiry_info_rssi_pscan info[]; + __u8 data[]; } __packed; #define HCI_EV_REMOTE_EXT_FEATURES 0x23 -- cgit v1.2.3 From 3809fe479861194e310c23ed48b010c7c0f72d22 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 16 Dec 2021 10:21:57 +0100 Subject: HID: address kernel-doc warnings The command ./scripts/kernel-doc -none include/linux/hid.h reports: include/linux/hid.h:818: warning: cannot understand function prototype: 'struct hid_ll_driver ' include/linux/hid.h:1135: warning: expecting prototype for hid_may_wakeup(). Prototype was for hid_hw_may_wakeup() instead Address those kernel-doc warnings. Signed-off-by: Lukas Bulwahn Signed-off-by: Jiri Kosina --- include/linux/hid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index f453be385bd4..aaf89a8a836c 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -788,7 +788,7 @@ struct hid_driver { container_of(pdrv, struct hid_driver, driver) /** - * hid_ll_driver - low level driver callbacks + * struct hid_ll_driver - low level driver callbacks * @start: called on probe to start the device * @stop: called on remove * @open: called by input layer on open @@ -1158,7 +1158,7 @@ static inline int hid_hw_idle(struct hid_device *hdev, int report, int idle, } /** - * hid_may_wakeup - return if the hid device may act as a wakeup source during system-suspend + * hid_hw_may_wakeup - return if the hid device may act as a wakeup source during system-suspend * * @hdev: hid device */ -- cgit v1.2.3 From 36dacddbf0bdba86cd00f066b4d724157eeb63f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dirk=20M=C3=BCller?= Date: Wed, 5 Jan 2022 17:38:47 +0100 Subject: lib/raid6: Use strict priority ranking for pq gen() benchmarking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On x86_64, currently 3 variants of AVX512, 3 variants of AVX2 and 3 variants of SSE2 are benchmarked on initialization, taking between 144-153 jiffies. Testing across a hardware pool of various generations of intel cpus I could not find a single case where SSE2 won over AVX2 or AVX512. There are cases where AVX2 wins over AVX512 however. Change "prefer" into an integer priority field (similar to how recov selection works) to have more than one ranking level available, which is backwards compatible with existing behavior. Give AVX2/512 variants higher priority over SSE2 in order to skip SSE testing when AVX is available. in a AVX2/x86_64/HZ=250 case this saves in the order of 200ms of initialization time. Signed-off-by: Dirk Müller Acked-by: Paul Menzel Signed-off-by: Song Liu --- include/linux/raid/pq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 154e954b711d..d6e5a1feb947 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -81,7 +81,7 @@ struct raid6_calls { void (*xor_syndrome)(int, int, int, size_t, void **); int (*valid)(void); /* Returns 1 if this routine set is usable */ const char *name; /* Name of this routine set */ - int prefer; /* Has special performance attribute */ + int priority; /* Relative priority ranking if non-zero */ }; /* Selected algorithm */ -- cgit v1.2.3 From 07f910f9b7295b6a28b337fedb56e612684c5659 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Oct 2021 14:46:50 +0100 Subject: mm: Remove slab from struct page All members of struct slab can now be removed from struct page. This shrinks the definition of struct page by 30 LOC, making it easier to understand. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Vlastimil Babka --- include/linux/mm_types.h | 28 ---------------------------- include/linux/page-flags.h | 37 ------------------------------------- 2 files changed, 65 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1ae3537c7920..646f3ed4f6df 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -118,31 +118,6 @@ struct page { atomic_long_t pp_frag_count; }; }; - struct { /* slab, slob and slub */ - union { - struct list_head slab_list; - struct { /* Partial pages */ - struct page *next; -#ifdef CONFIG_64BIT - int pages; /* Nr of pages left */ -#else - short int pages; -#endif - }; - }; - struct kmem_cache *slab_cache; /* not slob */ - /* Double-word boundary */ - void *freelist; /* first free object */ - union { - void *s_mem; /* slab: first object */ - unsigned long counters; /* SLUB */ - struct { /* SLUB */ - unsigned inuse:16; - unsigned objects:15; - unsigned frozen:1; - }; - }; - }; struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ @@ -206,9 +181,6 @@ struct page { * which are currently stored here. */ unsigned int page_type; - - unsigned int active; /* SLAB */ - int units; /* SLOB */ }; /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b5f14d581113..1b08e33265fa 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -909,43 +909,6 @@ extern bool is_free_buddy_page(struct page *page); __PAGEFLAG(Isolated, isolated, PF_ANY); -/* - * If network-based swap is enabled, sl*b must keep track of whether pages - * were allocated from pfmemalloc reserves. - */ -static inline int PageSlabPfmemalloc(struct page *page) -{ - VM_BUG_ON_PAGE(!PageSlab(page), page); - return PageActive(page); -} - -/* - * A version of PageSlabPfmemalloc() for opportunistic checks where the page - * might have been freed under us and not be a PageSlab anymore. - */ -static inline int __PageSlabPfmemalloc(struct page *page) -{ - return PageActive(page); -} - -static inline void SetPageSlabPfmemalloc(struct page *page) -{ - VM_BUG_ON_PAGE(!PageSlab(page), page); - SetPageActive(page); -} - -static inline void __ClearPageSlabPfmemalloc(struct page *page) -{ - VM_BUG_ON_PAGE(!PageSlab(page), page); - __ClearPageActive(page); -} - -static inline void ClearPageSlabPfmemalloc(struct page *page) -{ - VM_BUG_ON_PAGE(!PageSlab(page), page); - ClearPageActive(page); -} - #ifdef CONFIG_MMU #define __PG_MLOCKED (1UL << PG_mlocked) #else -- cgit v1.2.3 From 703f7066f40599c290babdb79dd61319264987e9 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 7 Dec 2021 13:17:33 +0100 Subject: random: remove unused irq_flags argument from add_interrupt_randomness() Since commit ee3e00e9e7101 ("random: use registers from interrupted code for CPU's w/o a cycle counter") the irq_flags argument is no longer used. Remove unused irq_flags. Cc: Borislav Petkov Cc: Dave Hansen Cc: Dexuan Cui Cc: H. Peter Anvin Cc: Haiyang Zhang Cc: Ingo Molnar Cc: K. Y. Srinivasan Cc: Stephen Hemminger Cc: Thomas Gleixner Cc: Wei Liu Cc: linux-hyperv@vger.kernel.org Cc: x86@kernel.org Signed-off-by: Sebastian Andrzej Siewior Acked-by: Wei Liu Signed-off-by: Jason A. Donenfeld --- include/linux/random.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index f45b8be3e3c4..c45b2693e51f 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -35,7 +35,7 @@ static inline void add_latent_entropy(void) {} extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; -extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; +extern void add_interrupt_randomness(int irq) __latent_entropy; extern void get_random_bytes(void *buf, int nbytes); extern int wait_for_random_bytes(void); -- cgit v1.2.3 From 6048fdcc5f269c7f31d774c295ce59081b36e6f9 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 22 Dec 2021 14:56:58 +0100 Subject: lib/crypto: blake2s: include as built-in In preparation for using blake2s in the RNG, we change the way that it is wired-in to the build system. Instead of using ifdefs to select the right symbol, we use weak symbols. And because ARM doesn't need the generic implementation, we make the generic one default only if an arch library doesn't need it already, and then have arch libraries that do need it opt-in. So that the arch libraries can remain tristate rather than bool, we then split the shash part from the glue code. Acked-by: Herbert Xu Acked-by: Ard Biesheuvel Acked-by: Greg Kroah-Hartman Cc: Masahiro Yamada Cc: linux-kbuild@vger.kernel.org Cc: linux-crypto@vger.kernel.org Signed-off-by: Jason A. Donenfeld --- include/crypto/internal/blake2s.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index 8e50d487500f..d39cfa0d333e 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -11,11 +11,11 @@ #include #include -void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, +void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, size_t nblocks, const u32 inc); -void blake2s_compress_arch(struct blake2s_state *state,const u8 *block, - size_t nblocks, const u32 inc); +void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc); bool blake2s_selftest(void); -- cgit v1.2.3 From 96562f286884e2db89c74215b199a1084b5fb7f7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 31 Dec 2021 09:26:08 +0100 Subject: random: early initialization of ChaCha constants Previously, the ChaCha constants for the primary pool were only initialized in crng_initialize_primary(), called by rand_initialize(). However, some randomness is actually extracted from the primary pool beforehand, e.g. by kmem_cache_create(). Therefore, statically initialize the ChaCha constants for the primary pool. Cc: Herbert Xu Cc: "David S. Miller" Cc: Signed-off-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- include/crypto/chacha.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index dabaee698718..b3ea73b81944 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -47,12 +47,19 @@ static inline void hchacha_block(const u32 *state, u32 *out, int nrounds) hchacha_block_generic(state, out, nrounds); } +enum chacha_constants { /* expand 32-byte k */ + CHACHA_CONSTANT_EXPA = 0x61707865U, + CHACHA_CONSTANT_ND_3 = 0x3320646eU, + CHACHA_CONSTANT_2_BY = 0x79622d32U, + CHACHA_CONSTANT_TE_K = 0x6b206574U +}; + static inline void chacha_init_consts(u32 *state) { - state[0] = 0x61707865; /* "expa" */ - state[1] = 0x3320646e; /* "nd 3" */ - state[2] = 0x79622d32; /* "2-by" */ - state[3] = 0x6b206574; /* "te k" */ + state[0] = CHACHA_CONSTANT_EXPA; + state[1] = CHACHA_CONSTANT_ND_3; + state[2] = CHACHA_CONSTANT_2_BY; + state[3] = CHACHA_CONSTANT_TE_K; } void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv); -- cgit v1.2.3 From 79b60ca83b6fa63ef307d2edcc77ee6581da8971 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 12 Dec 2021 14:51:27 +0200 Subject: net/mlx5: Introduce API for bulk request and release of IRQs Currently IRQs are requested one by one. To balance spreading IRQs among cpus using such scheme requires remembering cpu mask for the cpus used for a given device. This complicates the IRQ allocation scheme in subsequent patch. Hence, prepare the code for bulk IRQs allocation. This enables spreading IRQs among cpus in subsequent patch. Signed-off-by: Shay Drory Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index ea3ff5a8ced3..3705a382276b 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -9,13 +9,13 @@ #define MLX5_NUM_SPARE_EQE (0x80) struct mlx5_eq; +struct mlx5_irq; struct mlx5_core_dev; struct mlx5_eq_param { - u8 irq_index; int nent; u64 mask[4]; - cpumask_var_t affinity; + struct mlx5_irq *irq; }; struct mlx5_eq * -- cgit v1.2.3 From 91a760b26926265a60c77ddf016529bcf3e17a04 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 6 Jan 2022 21:20:20 +0800 Subject: net: bpf: Handle return value of BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND() The return value of BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND() in __inet_bind() is not handled properly. While the return value is non-zero, it will set inet_saddr and inet_rcv_saddr to 0 and exit: err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk); if (err) { inet->inet_saddr = inet->inet_rcv_saddr = 0; goto out_release_sock; } Let's take UDP for example and see what will happen. For UDP socket, it will be added to 'udp_prot.h.udp_table->hash' and 'udp_prot.h.udp_table->hash2' after the sk->sk_prot->get_port() called success. If 'inet->inet_rcv_saddr' is specified here, then 'sk' will be in the 'hslot2' of 'hash2' that it don't belong to (because inet_saddr is changed to 0), and UDP packet received will not be passed to this sock. If 'inet->inet_rcv_saddr' is not specified here, the sock will work fine, as it can receive packet properly, which is wired, as the 'bind()' is already failed. To undo the get_port() operation, introduce the 'put_port' field for 'struct proto'. For TCP proto, it is inet_put_port(); For UDP proto, it is udp_lib_unhash(); For icmp proto, it is ping_unhash(). Therefore, after sys_bind() fail caused by BPF_CGROUP_RUN_PROG_INET4_POST_BIND(), it will be unbinded, which means that it can try to be binded to another port. Signed-off-by: Menglong Dong Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220106132022.3470772-2-imagedong@tencent.com --- include/net/sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 7b4b4237e6e0..ff9b508d9c5f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1209,6 +1209,7 @@ struct proto { void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); + void (*put_port)(struct sock *sk); #ifdef CONFIG_BPF_SYSCALL int (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock, -- cgit v1.2.3 From 51d04bcfb82a005d38b6f1011dc04a810d359aea Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 20 Dec 2021 19:33:18 +0000 Subject: dt-bindings: clk/ingenic: Add MDMA and BDMA clocks The Ingenic JZ4760 and JZ4770 both have an extra DMA core named BDMA dedicated to the NAND and BCH controller, but which can also do memory-to-memory transfers. The JZ4760 additionally has a DMA core named MDMA dedicated to memory-to-memory transfers. The programming manual for the JZ4770 does have a bit for a MDMA clock, but does not seem to have the hardware wired in. Add macros for the MDMA and BDMA clocks to the dt-bindings include files, so that they can be used within Device Tree files. Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20211220193319.114974-2-paul@crapouillou.net Acked-by: Rob Herring Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/ingenic,jz4760-cgu.h | 2 ++ include/dt-bindings/clock/ingenic,jz4770-cgu.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/ingenic,jz4760-cgu.h b/include/dt-bindings/clock/ingenic,jz4760-cgu.h index 4bb2e19c4743..9fb04ebac6de 100644 --- a/include/dt-bindings/clock/ingenic,jz4760-cgu.h +++ b/include/dt-bindings/clock/ingenic,jz4760-cgu.h @@ -50,5 +50,7 @@ #define JZ4760_CLK_LPCLK_DIV 41 #define JZ4760_CLK_TVE 42 #define JZ4760_CLK_LPCLK 43 +#define JZ4760_CLK_MDMA 44 +#define JZ4760_CLK_BDMA 45 #endif /* __DT_BINDINGS_CLOCK_JZ4760_CGU_H__ */ diff --git a/include/dt-bindings/clock/ingenic,jz4770-cgu.h b/include/dt-bindings/clock/ingenic,jz4770-cgu.h index d68a7695a1f8..0b475e8ae321 100644 --- a/include/dt-bindings/clock/ingenic,jz4770-cgu.h +++ b/include/dt-bindings/clock/ingenic,jz4770-cgu.h @@ -54,5 +54,6 @@ #define JZ4770_CLK_OTG_PHY 45 #define JZ4770_CLK_EXT512 46 #define JZ4770_CLK_RTC 47 +#define JZ4770_CLK_BDMA 48 #endif /* __DT_BINDINGS_CLOCK_JZ4770_CGU_H__ */ -- cgit v1.2.3 From 3663f26b389b3951426971b44bb9312fdff0efec Mon Sep 17 00:00:00 2001 From: Ajit Kumar Pandey Date: Sun, 12 Dec 2021 23:35:24 +0530 Subject: drivers: acpi: acpi_apd: Remove unused device property "is-rv" Initially "is-rv" device property is added for 48MHz fixed clock support on Raven or RV architecture. It's unused now as we moved to pci device_id based selection to extend such support on other architectures. This change removed unused code from acpi driver. Signed-off-by: Ajit Kumar Pandey Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20211212180527.1641362-3-AjitKumar.Pandey@amd.com Signed-off-by: Stephen Boyd --- include/linux/platform_data/clk-fch.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/platform_data/clk-fch.h b/include/linux/platform_data/clk-fch.h index b9f682459f08..850ca776156d 100644 --- a/include/linux/platform_data/clk-fch.h +++ b/include/linux/platform_data/clk-fch.h @@ -12,7 +12,6 @@ struct fch_clk_data { void __iomem *base; - u32 is_rv; }; #endif /* __CLK_FCH_H */ -- cgit v1.2.3 From 7fdb98e8a768b3ccc05494d3ea4436047f512b9d Mon Sep 17 00:00:00 2001 From: Ajit Kumar Pandey Date: Sun, 12 Dec 2021 23:35:25 +0530 Subject: ACPI: APD: Add a fmw property clk-name Add a new device property to fetch clk-name from firmware. Signed-off-by: Ajit Kumar Pandey Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20211212180527.1641362-4-AjitKumar.Pandey@amd.com Signed-off-by: Stephen Boyd --- include/linux/platform_data/clk-fch.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/platform_data/clk-fch.h b/include/linux/platform_data/clk-fch.h index 850ca776156d..11a2a23fd9b2 100644 --- a/include/linux/platform_data/clk-fch.h +++ b/include/linux/platform_data/clk-fch.h @@ -12,6 +12,7 @@ struct fch_clk_data { void __iomem *base; + char *name; }; #endif /* __CLK_FCH_H */ -- cgit v1.2.3 From 4470c830f9791203e7514c6b4d3a0df194e3ee0d Mon Sep 17 00:00:00 2001 From: Sam Shih Date: Fri, 17 Dec 2021 20:11:47 +0800 Subject: clk: mediatek: add mt7986 clock IDs Add MT7986 clock dt-bindings, include topckgen, apmixedsys, infracfg, and ethernet subsystem clocks. Signed-off-by: Sam Shih Acked-by: Rob Herring Link: https://lore.kernel.org/r/20211217121148.6753-3-sam.shih@mediatek.com Reviewed-by: Ryder Lee Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/mt7986-clk.h | 169 +++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 include/dt-bindings/clock/mt7986-clk.h (limited to 'include') diff --git a/include/dt-bindings/clock/mt7986-clk.h b/include/dt-bindings/clock/mt7986-clk.h new file mode 100644 index 000000000000..5a9b169324b0 --- /dev/null +++ b/include/dt-bindings/clock/mt7986-clk.h @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2021 MediaTek Inc. + * Author: Sam Shih + */ + +#ifndef _DT_BINDINGS_CLK_MT7986_H +#define _DT_BINDINGS_CLK_MT7986_H + +/* APMIXEDSYS */ + +#define CLK_APMIXED_ARMPLL 0 +#define CLK_APMIXED_NET2PLL 1 +#define CLK_APMIXED_MMPLL 2 +#define CLK_APMIXED_SGMPLL 3 +#define CLK_APMIXED_WEDMCUPLL 4 +#define CLK_APMIXED_NET1PLL 5 +#define CLK_APMIXED_MPLL 6 +#define CLK_APMIXED_APLL2 7 + +/* TOPCKGEN */ + +#define CLK_TOP_XTAL 0 +#define CLK_TOP_XTAL_D2 1 +#define CLK_TOP_RTC_32K 2 +#define CLK_TOP_RTC_32P7K 3 +#define CLK_TOP_MPLL_D2 4 +#define CLK_TOP_MPLL_D4 5 +#define CLK_TOP_MPLL_D8 6 +#define CLK_TOP_MPLL_D8_D2 7 +#define CLK_TOP_MPLL_D3_D2 8 +#define CLK_TOP_MMPLL_D2 9 +#define CLK_TOP_MMPLL_D4 10 +#define CLK_TOP_MMPLL_D8 11 +#define CLK_TOP_MMPLL_D8_D2 12 +#define CLK_TOP_MMPLL_D3_D8 13 +#define CLK_TOP_MMPLL_U2PHY 14 +#define CLK_TOP_APLL2_D4 15 +#define CLK_TOP_NET1PLL_D4 16 +#define CLK_TOP_NET1PLL_D5 17 +#define CLK_TOP_NET1PLL_D5_D2 18 +#define CLK_TOP_NET1PLL_D5_D4 19 +#define CLK_TOP_NET1PLL_D8_D2 20 +#define CLK_TOP_NET1PLL_D8_D4 21 +#define CLK_TOP_NET2PLL_D4 22 +#define CLK_TOP_NET2PLL_D4_D2 23 +#define CLK_TOP_NET2PLL_D3_D2 24 +#define CLK_TOP_WEDMCUPLL_D5_D2 25 +#define CLK_TOP_NFI1X_SEL 26 +#define CLK_TOP_SPINFI_SEL 27 +#define CLK_TOP_SPI_SEL 28 +#define CLK_TOP_SPIM_MST_SEL 29 +#define CLK_TOP_UART_SEL 30 +#define CLK_TOP_PWM_SEL 31 +#define CLK_TOP_I2C_SEL 32 +#define CLK_TOP_PEXTP_TL_SEL 33 +#define CLK_TOP_EMMC_250M_SEL 34 +#define CLK_TOP_EMMC_416M_SEL 35 +#define CLK_TOP_F_26M_ADC_SEL 36 +#define CLK_TOP_DRAMC_SEL 37 +#define CLK_TOP_DRAMC_MD32_SEL 38 +#define CLK_TOP_SYSAXI_SEL 39 +#define CLK_TOP_SYSAPB_SEL 40 +#define CLK_TOP_ARM_DB_MAIN_SEL 41 +#define CLK_TOP_ARM_DB_JTSEL 42 +#define CLK_TOP_NETSYS_SEL 43 +#define CLK_TOP_NETSYS_500M_SEL 44 +#define CLK_TOP_NETSYS_MCU_SEL 45 +#define CLK_TOP_NETSYS_2X_SEL 46 +#define CLK_TOP_SGM_325M_SEL 47 +#define CLK_TOP_SGM_REG_SEL 48 +#define CLK_TOP_A1SYS_SEL 49 +#define CLK_TOP_CONN_MCUSYS_SEL 50 +#define CLK_TOP_EIP_B_SEL 51 +#define CLK_TOP_PCIE_PHY_SEL 52 +#define CLK_TOP_USB3_PHY_SEL 53 +#define CLK_TOP_F26M_SEL 54 +#define CLK_TOP_AUD_L_SEL 55 +#define CLK_TOP_A_TUNER_SEL 56 +#define CLK_TOP_U2U3_SEL 57 +#define CLK_TOP_U2U3_SYS_SEL 58 +#define CLK_TOP_U2U3_XHCI_SEL 59 +#define CLK_TOP_DA_U2_REFSEL 60 +#define CLK_TOP_DA_U2_CK_1P_SEL 61 +#define CLK_TOP_AP2CNN_HOST_SEL 62 +#define CLK_TOP_JTAG 63 + +/* INFRACFG */ + +#define CLK_INFRA_SYSAXI_D2 0 +#define CLK_INFRA_UART0_SEL 1 +#define CLK_INFRA_UART1_SEL 2 +#define CLK_INFRA_UART2_SEL 3 +#define CLK_INFRA_SPI0_SEL 4 +#define CLK_INFRA_SPI1_SEL 5 +#define CLK_INFRA_PWM1_SEL 6 +#define CLK_INFRA_PWM2_SEL 7 +#define CLK_INFRA_PWM_BSEL 8 +#define CLK_INFRA_PCIE_SEL 9 +#define CLK_INFRA_GPT_STA 10 +#define CLK_INFRA_PWM_HCK 11 +#define CLK_INFRA_PWM_STA 12 +#define CLK_INFRA_PWM1_CK 13 +#define CLK_INFRA_PWM2_CK 14 +#define CLK_INFRA_CQ_DMA_CK 15 +#define CLK_INFRA_EIP97_CK 16 +#define CLK_INFRA_AUD_BUS_CK 17 +#define CLK_INFRA_AUD_26M_CK 18 +#define CLK_INFRA_AUD_L_CK 19 +#define CLK_INFRA_AUD_AUD_CK 20 +#define CLK_INFRA_AUD_EG2_CK 21 +#define CLK_INFRA_DRAMC_26M_CK 22 +#define CLK_INFRA_DBG_CK 23 +#define CLK_INFRA_AP_DMA_CK 24 +#define CLK_INFRA_SEJ_CK 25 +#define CLK_INFRA_SEJ_13M_CK 26 +#define CLK_INFRA_THERM_CK 27 +#define CLK_INFRA_I2C0_CK 28 +#define CLK_INFRA_UART0_CK 29 +#define CLK_INFRA_UART1_CK 30 +#define CLK_INFRA_UART2_CK 31 +#define CLK_INFRA_NFI1_CK 32 +#define CLK_INFRA_SPINFI1_CK 33 +#define CLK_INFRA_NFI_HCK_CK 34 +#define CLK_INFRA_SPI0_CK 35 +#define CLK_INFRA_SPI1_CK 36 +#define CLK_INFRA_SPI0_HCK_CK 37 +#define CLK_INFRA_SPI1_HCK_CK 38 +#define CLK_INFRA_FRTC_CK 39 +#define CLK_INFRA_MSDC_CK 40 +#define CLK_INFRA_MSDC_HCK_CK 41 +#define CLK_INFRA_MSDC_133M_CK 42 +#define CLK_INFRA_MSDC_66M_CK 43 +#define CLK_INFRA_ADC_26M_CK 44 +#define CLK_INFRA_ADC_FRC_CK 45 +#define CLK_INFRA_FBIST2FPC_CK 46 +#define CLK_INFRA_IUSB_133_CK 47 +#define CLK_INFRA_IUSB_66M_CK 48 +#define CLK_INFRA_IUSB_SYS_CK 49 +#define CLK_INFRA_IUSB_CK 50 +#define CLK_INFRA_IPCIE_CK 51 +#define CLK_INFRA_IPCIE_PIPE_CK 52 +#define CLK_INFRA_IPCIER_CK 53 +#define CLK_INFRA_IPCIEB_CK 54 +#define CLK_INFRA_TRNG_CK 55 + +/* SGMIISYS_0 */ + +#define CLK_SGMII0_TX250M_EN 0 +#define CLK_SGMII0_RX250M_EN 1 +#define CLK_SGMII0_CDR_REF 2 +#define CLK_SGMII0_CDR_FB 3 + +/* SGMIISYS_1 */ + +#define CLK_SGMII1_TX250M_EN 0 +#define CLK_SGMII1_RX250M_EN 1 +#define CLK_SGMII1_CDR_REF 2 +#define CLK_SGMII1_CDR_FB 3 + +/* ETHSYS */ + +#define CLK_ETH_FE_EN 0 +#define CLK_ETH_GP2_EN 1 +#define CLK_ETH_GP1_EN 2 +#define CLK_ETH_WOCPU1_EN 3 +#define CLK_ETH_WOCPU0_EN 4 + +#endif /* _DT_BINDINGS_CLK_MT7986_H */ -- cgit v1.2.3 From 850cba069c266d6f31b81c5a199052a3482a63fc Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 31 Oct 2021 12:58:05 +0000 Subject: cachefiles: Delete the cachefiles driver pending rewrite Delete the code from the cachefiles driver to make it easier to rewrite and resubmit in a logical manner. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819577641.215744.12718114397770666596.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906883770.143852.4149714614981373410.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967076066.1823006.7175712134577687753.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021483619.640689.7586546280515844702.stgit@warthog.procyon.org.uk/ # v4 --- include/trace/events/cachefiles.h | 321 -------------------------------------- 1 file changed, 321 deletions(-) delete mode 100644 include/trace/events/cachefiles.h (limited to 'include') diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h deleted file mode 100644 index 920b6a303d60..000000000000 --- a/include/trace/events/cachefiles.h +++ /dev/null @@ -1,321 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* CacheFiles tracepoints - * - * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM cachefiles - -#if !defined(_TRACE_CACHEFILES_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_CACHEFILES_H - -#include - -/* - * Define enums for tracing information. - */ -#ifndef __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY -#define __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY - -enum cachefiles_obj_ref_trace { - cachefiles_obj_put_wait_retry = fscache_obj_ref__nr_traces, - cachefiles_obj_put_wait_timeo, - cachefiles_obj_ref__nr_traces -}; - -#endif - -/* - * Define enum -> string mappings for display. - */ -#define cachefiles_obj_kill_traces \ - EM(FSCACHE_OBJECT_IS_STALE, "stale") \ - EM(FSCACHE_OBJECT_NO_SPACE, "no_space") \ - EM(FSCACHE_OBJECT_WAS_RETIRED, "was_retired") \ - E_(FSCACHE_OBJECT_WAS_CULLED, "was_culled") - -#define cachefiles_obj_ref_traces \ - EM(fscache_obj_get_add_to_deps, "GET add_to_deps") \ - EM(fscache_obj_get_queue, "GET queue") \ - EM(fscache_obj_put_alloc_fail, "PUT alloc_fail") \ - EM(fscache_obj_put_attach_fail, "PUT attach_fail") \ - EM(fscache_obj_put_drop_obj, "PUT drop_obj") \ - EM(fscache_obj_put_enq_dep, "PUT enq_dep") \ - EM(fscache_obj_put_queue, "PUT queue") \ - EM(fscache_obj_put_work, "PUT work") \ - EM(cachefiles_obj_put_wait_retry, "PUT wait_retry") \ - E_(cachefiles_obj_put_wait_timeo, "PUT wait_timeo") - -/* - * Export enum symbols via userspace. - */ -#undef EM -#undef E_ -#define EM(a, b) TRACE_DEFINE_ENUM(a); -#define E_(a, b) TRACE_DEFINE_ENUM(a); - -cachefiles_obj_kill_traces; -cachefiles_obj_ref_traces; - -/* - * Now redefine the EM() and E_() macros to map the enums to the strings that - * will be printed in the output. - */ -#undef EM -#undef E_ -#define EM(a, b) { a, b }, -#define E_(a, b) { a, b } - - -TRACE_EVENT(cachefiles_ref, - TP_PROTO(struct cachefiles_object *obj, - struct fscache_cookie *cookie, - enum cachefiles_obj_ref_trace why, - int usage), - - TP_ARGS(obj, cookie, why, usage), - - /* Note that obj may be NULL */ - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, cookie ) - __field(enum cachefiles_obj_ref_trace, why ) - __field(int, usage ) - ), - - TP_fast_assign( - __entry->obj = obj->fscache.debug_id; - __entry->cookie = cookie->debug_id; - __entry->usage = usage; - __entry->why = why; - ), - - TP_printk("c=%08x o=%08x u=%d %s", - __entry->cookie, __entry->obj, __entry->usage, - __print_symbolic(__entry->why, cachefiles_obj_ref_traces)) - ); - -TRACE_EVENT(cachefiles_lookup, - TP_PROTO(struct cachefiles_object *obj, - struct dentry *de, - struct inode *inode), - - TP_ARGS(obj, de, inode), - - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(struct dentry *, de ) - __field(struct inode *, inode ) - ), - - TP_fast_assign( - __entry->obj = obj->fscache.debug_id; - __entry->de = de; - __entry->inode = inode; - ), - - TP_printk("o=%08x d=%p i=%p", - __entry->obj, __entry->de, __entry->inode) - ); - -TRACE_EVENT(cachefiles_mkdir, - TP_PROTO(struct cachefiles_object *obj, - struct dentry *de, int ret), - - TP_ARGS(obj, de, ret), - - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(struct dentry *, de ) - __field(int, ret ) - ), - - TP_fast_assign( - __entry->obj = obj->fscache.debug_id; - __entry->de = de; - __entry->ret = ret; - ), - - TP_printk("o=%08x d=%p r=%u", - __entry->obj, __entry->de, __entry->ret) - ); - -TRACE_EVENT(cachefiles_create, - TP_PROTO(struct cachefiles_object *obj, - struct dentry *de, int ret), - - TP_ARGS(obj, de, ret), - - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(struct dentry *, de ) - __field(int, ret ) - ), - - TP_fast_assign( - __entry->obj = obj->fscache.debug_id; - __entry->de = de; - __entry->ret = ret; - ), - - TP_printk("o=%08x d=%p r=%u", - __entry->obj, __entry->de, __entry->ret) - ); - -TRACE_EVENT(cachefiles_unlink, - TP_PROTO(struct cachefiles_object *obj, - struct dentry *de, - enum fscache_why_object_killed why), - - TP_ARGS(obj, de, why), - - /* Note that obj may be NULL */ - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(struct dentry *, de ) - __field(enum fscache_why_object_killed, why ) - ), - - TP_fast_assign( - __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX; - __entry->de = de; - __entry->why = why; - ), - - TP_printk("o=%08x d=%p w=%s", - __entry->obj, __entry->de, - __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) - ); - -TRACE_EVENT(cachefiles_rename, - TP_PROTO(struct cachefiles_object *obj, - struct dentry *de, - struct dentry *to, - enum fscache_why_object_killed why), - - TP_ARGS(obj, de, to, why), - - /* Note that obj may be NULL */ - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(struct dentry *, de ) - __field(struct dentry *, to ) - __field(enum fscache_why_object_killed, why ) - ), - - TP_fast_assign( - __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX; - __entry->de = de; - __entry->to = to; - __entry->why = why; - ), - - TP_printk("o=%08x d=%p t=%p w=%s", - __entry->obj, __entry->de, __entry->to, - __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) - ); - -TRACE_EVENT(cachefiles_mark_active, - TP_PROTO(struct cachefiles_object *obj, - struct dentry *de), - - TP_ARGS(obj, de), - - /* Note that obj may be NULL */ - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(struct dentry *, de ) - ), - - TP_fast_assign( - __entry->obj = obj->fscache.debug_id; - __entry->de = de; - ), - - TP_printk("o=%08x d=%p", - __entry->obj, __entry->de) - ); - -TRACE_EVENT(cachefiles_wait_active, - TP_PROTO(struct cachefiles_object *obj, - struct dentry *de, - struct cachefiles_object *xobj), - - TP_ARGS(obj, de, xobj), - - /* Note that obj may be NULL */ - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, xobj ) - __field(struct dentry *, de ) - __field(u16, flags ) - __field(u16, fsc_flags ) - ), - - TP_fast_assign( - __entry->obj = obj->fscache.debug_id; - __entry->de = de; - __entry->xobj = xobj->fscache.debug_id; - __entry->flags = xobj->flags; - __entry->fsc_flags = xobj->fscache.flags; - ), - - TP_printk("o=%08x d=%p wo=%08x wf=%x wff=%x", - __entry->obj, __entry->de, __entry->xobj, - __entry->flags, __entry->fsc_flags) - ); - -TRACE_EVENT(cachefiles_mark_inactive, - TP_PROTO(struct cachefiles_object *obj, - struct dentry *de, - struct inode *inode), - - TP_ARGS(obj, de, inode), - - /* Note that obj may be NULL */ - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(struct dentry *, de ) - __field(struct inode *, inode ) - ), - - TP_fast_assign( - __entry->obj = obj->fscache.debug_id; - __entry->de = de; - __entry->inode = inode; - ), - - TP_printk("o=%08x d=%p i=%p", - __entry->obj, __entry->de, __entry->inode) - ); - -TRACE_EVENT(cachefiles_mark_buried, - TP_PROTO(struct cachefiles_object *obj, - struct dentry *de, - enum fscache_why_object_killed why), - - TP_ARGS(obj, de, why), - - /* Note that obj may be NULL */ - TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(struct dentry *, de ) - __field(enum fscache_why_object_killed, why ) - ), - - TP_fast_assign( - __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX; - __entry->de = de; - __entry->why = why; - ), - - TP_printk("o=%08x d=%p w=%s", - __entry->obj, __entry->de, - __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) - ); - -#endif /* _TRACE_CACHEFILES_H */ - -/* This part must be outside protection */ -#include -- cgit v1.2.3 From 2cee6fbb7f01bcb25f11ef1439e89a29de4c0c1d Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Oct 2021 21:53:44 +0100 Subject: fscache: Remove the contents of the fscache driver, pending rewrite Remove the code that comprises the fscache driver as it's going to be substantially rewritten, with the majority of the code being erased in the rewrite. A small piece of linux/fscache.h is left as that is #included by a bunch of network filesystems. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819578724.215744.18210619052245724238.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906884814.143852.6727245089843862889.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967077097.1823006.1377665951499979089.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021485548.640689.13876080567388696162.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 548 +------------------------- include/linux/fscache.h | 851 +---------------------------------------- include/trace/events/fscache.h | 523 ------------------------- 3 files changed, 3 insertions(+), 1919 deletions(-) delete mode 100644 include/trace/events/fscache.h (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 8d39491c5f9f..47f21a53ac4b 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* General filesystem caching backing cache interface * - * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * NOTE!!! See: @@ -15,551 +15,5 @@ #define _LINUX_FSCACHE_CACHE_H #include -#include -#include - -#define NR_MAXCACHES BITS_PER_LONG - -struct fscache_cache; -struct fscache_cache_ops; -struct fscache_object; -struct fscache_operation; - -enum fscache_obj_ref_trace { - fscache_obj_get_add_to_deps, - fscache_obj_get_queue, - fscache_obj_put_alloc_fail, - fscache_obj_put_attach_fail, - fscache_obj_put_drop_obj, - fscache_obj_put_enq_dep, - fscache_obj_put_queue, - fscache_obj_put_work, - fscache_obj_ref__nr_traces -}; - -/* - * cache tag definition - */ -struct fscache_cache_tag { - struct list_head link; - struct fscache_cache *cache; /* cache referred to by this tag */ - unsigned long flags; -#define FSCACHE_TAG_RESERVED 0 /* T if tag is reserved for a cache */ - atomic_t usage; - char name[]; /* tag name */ -}; - -/* - * cache definition - */ -struct fscache_cache { - const struct fscache_cache_ops *ops; - struct fscache_cache_tag *tag; /* tag representing this cache */ - struct kobject *kobj; /* system representation of this cache */ - struct list_head link; /* link in list of caches */ - size_t max_index_size; /* maximum size of index data */ - char identifier[36]; /* cache label */ - - /* node management */ - struct work_struct op_gc; /* operation garbage collector */ - struct list_head object_list; /* list of data/index objects */ - struct list_head op_gc_list; /* list of ops to be deleted */ - spinlock_t object_list_lock; - spinlock_t op_gc_list_lock; - atomic_t object_count; /* no. of live objects in this cache */ - struct fscache_object *fsdef; /* object for the fsdef index */ - unsigned long flags; -#define FSCACHE_IOERROR 0 /* cache stopped on I/O error */ -#define FSCACHE_CACHE_WITHDRAWN 1 /* cache has been withdrawn */ -}; - -extern wait_queue_head_t fscache_cache_cleared_wq; - -/* - * operation to be applied to a cache object - * - retrieval initiation operations are done in the context of the process - * that issued them, and not in an async thread pool - */ -typedef void (*fscache_operation_release_t)(struct fscache_operation *op); -typedef void (*fscache_operation_processor_t)(struct fscache_operation *op); -typedef void (*fscache_operation_cancel_t)(struct fscache_operation *op); - -enum fscache_operation_state { - FSCACHE_OP_ST_BLANK, /* Op is not yet submitted */ - FSCACHE_OP_ST_INITIALISED, /* Op is initialised */ - FSCACHE_OP_ST_PENDING, /* Op is blocked from running */ - FSCACHE_OP_ST_IN_PROGRESS, /* Op is in progress */ - FSCACHE_OP_ST_COMPLETE, /* Op is complete */ - FSCACHE_OP_ST_CANCELLED, /* Op has been cancelled */ - FSCACHE_OP_ST_DEAD /* Op is now dead */ -}; - -struct fscache_operation { - struct work_struct work; /* record for async ops */ - struct list_head pend_link; /* link in object->pending_ops */ - struct fscache_object *object; /* object to be operated upon */ - - unsigned long flags; -#define FSCACHE_OP_TYPE 0x000f /* operation type */ -#define FSCACHE_OP_ASYNC 0x0001 /* - async op, processor may sleep for disk */ -#define FSCACHE_OP_MYTHREAD 0x0002 /* - processing is done be issuing thread, not pool */ -#define FSCACHE_OP_WAITING 4 /* cleared when op is woken */ -#define FSCACHE_OP_EXCLUSIVE 5 /* exclusive op, other ops must wait */ -#define FSCACHE_OP_DEC_READ_CNT 6 /* decrement object->n_reads on destruction */ -#define FSCACHE_OP_UNUSE_COOKIE 7 /* call fscache_unuse_cookie() on completion */ -#define FSCACHE_OP_KEEP_FLAGS 0x00f0 /* flags to keep when repurposing an op */ - - enum fscache_operation_state state; - atomic_t usage; - unsigned debug_id; /* debugging ID */ - - /* operation processor callback - * - can be NULL if FSCACHE_OP_WAITING is going to be used to perform - * the op in a non-pool thread */ - fscache_operation_processor_t processor; - - /* Operation cancellation cleanup (optional) */ - fscache_operation_cancel_t cancel; - - /* operation releaser */ - fscache_operation_release_t release; -}; - -extern atomic_t fscache_op_debug_id; -extern void fscache_op_work_func(struct work_struct *work); - -extern void fscache_enqueue_operation(struct fscache_operation *); -extern void fscache_op_complete(struct fscache_operation *, bool); -extern void fscache_put_operation(struct fscache_operation *); -extern void fscache_operation_init(struct fscache_cookie *, - struct fscache_operation *, - fscache_operation_processor_t, - fscache_operation_cancel_t, - fscache_operation_release_t); - -/* - * data read operation - */ -struct fscache_retrieval { - struct fscache_operation op; - struct fscache_cookie *cookie; /* The netfs cookie */ - struct address_space *mapping; /* netfs pages */ - fscache_rw_complete_t end_io_func; /* function to call on I/O completion */ - void *context; /* netfs read context (pinned) */ - struct list_head to_do; /* list of things to be done by the backend */ - atomic_t n_pages; /* number of pages to be retrieved */ -}; - -typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op, - struct page *page, - gfp_t gfp); - -typedef int (*fscache_pages_retrieval_func_t)(struct fscache_retrieval *op, - struct list_head *pages, - unsigned *nr_pages, - gfp_t gfp); - -/** - * fscache_get_retrieval - Get an extra reference on a retrieval operation - * @op: The retrieval operation to get a reference on - * - * Get an extra reference on a retrieval operation. - */ -static inline -struct fscache_retrieval *fscache_get_retrieval(struct fscache_retrieval *op) -{ - atomic_inc(&op->op.usage); - return op; -} - -/** - * fscache_enqueue_retrieval - Enqueue a retrieval operation for processing - * @op: The retrieval operation affected - * - * Enqueue a retrieval operation for processing by the FS-Cache thread pool. - */ -static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op) -{ - fscache_enqueue_operation(&op->op); -} - -/** - * fscache_retrieval_complete - Record (partial) completion of a retrieval - * @op: The retrieval operation affected - * @n_pages: The number of pages to account for - */ -static inline void fscache_retrieval_complete(struct fscache_retrieval *op, - int n_pages) -{ - if (atomic_sub_return_relaxed(n_pages, &op->n_pages) <= 0) - fscache_op_complete(&op->op, false); -} - -/** - * fscache_put_retrieval - Drop a reference to a retrieval operation - * @op: The retrieval operation affected - * - * Drop a reference to a retrieval operation. - */ -static inline void fscache_put_retrieval(struct fscache_retrieval *op) -{ - fscache_put_operation(&op->op); -} - -/* - * cached page storage work item - * - used to do three things: - * - batch writes to the cache - * - do cache writes asynchronously - * - defer writes until cache object lookup completion - */ -struct fscache_storage { - struct fscache_operation op; - pgoff_t store_limit; /* don't write more than this */ -}; - -/* - * cache operations - */ -struct fscache_cache_ops { - /* name of cache provider */ - const char *name; - - /* allocate an object record for a cookie */ - struct fscache_object *(*alloc_object)(struct fscache_cache *cache, - struct fscache_cookie *cookie); - - /* look up the object for a cookie - * - return -ETIMEDOUT to be requeued - */ - int (*lookup_object)(struct fscache_object *object); - - /* finished looking up */ - void (*lookup_complete)(struct fscache_object *object); - - /* increment the usage count on this object (may fail if unmounting) */ - struct fscache_object *(*grab_object)(struct fscache_object *object, - enum fscache_obj_ref_trace why); - - /* pin an object in the cache */ - int (*pin_object)(struct fscache_object *object); - - /* unpin an object in the cache */ - void (*unpin_object)(struct fscache_object *object); - - /* check the consistency between the backing cache and the FS-Cache - * cookie */ - int (*check_consistency)(struct fscache_operation *op); - - /* store the updated auxiliary data on an object */ - void (*update_object)(struct fscache_object *object); - - /* Invalidate an object */ - void (*invalidate_object)(struct fscache_operation *op); - - /* discard the resources pinned by an object and effect retirement if - * necessary */ - void (*drop_object)(struct fscache_object *object); - - /* dispose of a reference to an object */ - void (*put_object)(struct fscache_object *object, - enum fscache_obj_ref_trace why); - - /* sync a cache */ - void (*sync_cache)(struct fscache_cache *cache); - - /* notification that the attributes of a non-index object (such as - * i_size) have changed */ - int (*attr_changed)(struct fscache_object *object); - - /* reserve space for an object's data and associated metadata */ - int (*reserve_space)(struct fscache_object *object, loff_t i_size); - - /* request a backing block for a page be read or allocated in the - * cache */ - fscache_page_retrieval_func_t read_or_alloc_page; - - /* request backing blocks for a list of pages be read or allocated in - * the cache */ - fscache_pages_retrieval_func_t read_or_alloc_pages; - - /* request a backing block for a page be allocated in the cache so that - * it can be written directly */ - fscache_page_retrieval_func_t allocate_page; - - /* request backing blocks for pages be allocated in the cache so that - * they can be written directly */ - fscache_pages_retrieval_func_t allocate_pages; - - /* write a page to its backing block in the cache */ - int (*write_page)(struct fscache_storage *op, struct page *page); - - /* detach backing block from a page (optional) - * - must release the cookie lock before returning - * - may sleep - */ - void (*uncache_page)(struct fscache_object *object, - struct page *page); - - /* dissociate a cache from all the pages it was backing */ - void (*dissociate_pages)(struct fscache_cache *cache); - - /* Begin a read operation for the netfs lib */ - int (*begin_read_operation)(struct netfs_read_request *rreq, - struct fscache_retrieval *op); -}; - -extern struct fscache_cookie fscache_fsdef_index; - -/* - * Event list for fscache_object::{event_mask,events} - */ -enum { - FSCACHE_OBJECT_EV_NEW_CHILD, /* T if object has a new child */ - FSCACHE_OBJECT_EV_PARENT_READY, /* T if object's parent is ready */ - FSCACHE_OBJECT_EV_UPDATE, /* T if object should be updated */ - FSCACHE_OBJECT_EV_INVALIDATE, /* T if cache requested object invalidation */ - FSCACHE_OBJECT_EV_CLEARED, /* T if accessors all gone */ - FSCACHE_OBJECT_EV_ERROR, /* T if fatal error occurred during processing */ - FSCACHE_OBJECT_EV_KILL, /* T if netfs relinquished or cache withdrew object */ - NR_FSCACHE_OBJECT_EVENTS -}; - -#define FSCACHE_OBJECT_EVENTS_MASK ((1UL << NR_FSCACHE_OBJECT_EVENTS) - 1) - -/* - * States for object state machine. - */ -struct fscache_transition { - unsigned long events; - const struct fscache_state *transit_to; -}; - -struct fscache_state { - char name[24]; - char short_name[8]; - const struct fscache_state *(*work)(struct fscache_object *object, - int event); - const struct fscache_transition transitions[]; -}; - -/* - * on-disk cache file or index handle - */ -struct fscache_object { - const struct fscache_state *state; /* Object state machine state */ - const struct fscache_transition *oob_table; /* OOB state transition table */ - int debug_id; /* debugging ID */ - int n_children; /* number of child objects */ - int n_ops; /* number of extant ops on object */ - int n_obj_ops; /* number of object ops outstanding on object */ - int n_in_progress; /* number of ops in progress */ - int n_exclusive; /* number of exclusive ops queued or in progress */ - atomic_t n_reads; /* number of read ops in progress */ - spinlock_t lock; /* state and operations lock */ - - unsigned long lookup_jif; /* time at which lookup started */ - unsigned long oob_event_mask; /* OOB events this object is interested in */ - unsigned long event_mask; /* events this object is interested in */ - unsigned long events; /* events to be processed by this object - * (order is important - using fls) */ - - unsigned long flags; -#define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */ -#define FSCACHE_OBJECT_PENDING_WRITE 1 /* T if object has pending write */ -#define FSCACHE_OBJECT_WAITING 2 /* T if object is waiting on its parent */ -#define FSCACHE_OBJECT_IS_LIVE 3 /* T if object is not withdrawn or relinquished */ -#define FSCACHE_OBJECT_IS_LOOKED_UP 4 /* T if object has been looked up */ -#define FSCACHE_OBJECT_IS_AVAILABLE 5 /* T if object has become active */ -#define FSCACHE_OBJECT_RETIRED 6 /* T if object was retired on relinquishment */ -#define FSCACHE_OBJECT_KILLED_BY_CACHE 7 /* T if object was killed by the cache */ -#define FSCACHE_OBJECT_RUN_AFTER_DEAD 8 /* T if object has been dispatched after death */ - - struct list_head cache_link; /* link in cache->object_list */ - struct hlist_node cookie_link; /* link in cookie->backing_objects */ - struct fscache_cache *cache; /* cache that supplied this object */ - struct fscache_cookie *cookie; /* netfs's file/index object */ - struct fscache_object *parent; /* parent object */ - struct work_struct work; /* attention scheduling record */ - struct list_head dependents; /* FIFO of dependent objects */ - struct list_head dep_link; /* link in parent's dependents list */ - struct list_head pending_ops; /* unstarted operations on this object */ - pgoff_t store_limit; /* current storage limit */ - loff_t store_limit_l; /* current storage limit */ -}; - -extern void fscache_object_init(struct fscache_object *, struct fscache_cookie *, - struct fscache_cache *); -extern void fscache_object_destroy(struct fscache_object *); - -extern void fscache_object_lookup_negative(struct fscache_object *object); -extern void fscache_obtained_object(struct fscache_object *object); - -static inline bool fscache_object_is_live(struct fscache_object *object) -{ - return test_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); -} - -static inline bool fscache_object_is_dying(struct fscache_object *object) -{ - return !fscache_object_is_live(object); -} - -static inline bool fscache_object_is_available(struct fscache_object *object) -{ - return test_bit(FSCACHE_OBJECT_IS_AVAILABLE, &object->flags); -} - -static inline bool fscache_cache_is_broken(struct fscache_object *object) -{ - return test_bit(FSCACHE_IOERROR, &object->cache->flags); -} - -static inline bool fscache_object_is_active(struct fscache_object *object) -{ - return fscache_object_is_available(object) && - fscache_object_is_live(object) && - !fscache_cache_is_broken(object); -} - -/** - * fscache_object_destroyed - Note destruction of an object in a cache - * @cache: The cache from which the object came - * - * Note the destruction and deallocation of an object record in a cache. - */ -static inline void fscache_object_destroyed(struct fscache_cache *cache) -{ - if (atomic_dec_and_test(&cache->object_count)) - wake_up_all(&fscache_cache_cleared_wq); -} - -/** - * fscache_object_lookup_error - Note an object encountered an error - * @object: The object on which the error was encountered - * - * Note that an object encountered a fatal error (usually an I/O error) and - * that it should be withdrawn as soon as possible. - */ -static inline void fscache_object_lookup_error(struct fscache_object *object) -{ - set_bit(FSCACHE_OBJECT_EV_ERROR, &object->events); -} - -/** - * fscache_set_store_limit - Set the maximum size to be stored in an object - * @object: The object to set the maximum on - * @i_size: The limit to set in bytes - * - * Set the maximum size an object is permitted to reach, implying the highest - * byte that may be written. Intended to be called by the attr_changed() op. - * - * See Documentation/filesystems/caching/backend-api.rst for a complete - * description. - */ -static inline -void fscache_set_store_limit(struct fscache_object *object, loff_t i_size) -{ - object->store_limit_l = i_size; - object->store_limit = i_size >> PAGE_SHIFT; - if (i_size & ~PAGE_MASK) - object->store_limit++; -} - -/** - * fscache_end_io - End a retrieval operation on a page - * @op: The FS-Cache operation covering the retrieval - * @page: The page that was to be fetched - * @error: The error code (0 if successful) - * - * Note the end of an operation to retrieve a page, as covered by a particular - * operation record. - */ -static inline void fscache_end_io(struct fscache_retrieval *op, - struct page *page, int error) -{ - op->end_io_func(page, op->context, error); -} - -static inline void __fscache_use_cookie(struct fscache_cookie *cookie) -{ - atomic_inc(&cookie->n_active); -} - -/** - * fscache_use_cookie - Request usage of cookie attached to an object - * @object: Object description - * - * Request usage of the cookie attached to an object. NULL is returned if the - * relinquishment had reduced the cookie usage count to 0. - */ -static inline bool fscache_use_cookie(struct fscache_object *object) -{ - struct fscache_cookie *cookie = object->cookie; - return atomic_inc_not_zero(&cookie->n_active) != 0; -} - -static inline bool __fscache_unuse_cookie(struct fscache_cookie *cookie) -{ - return atomic_dec_and_test(&cookie->n_active); -} - -static inline void __fscache_wake_unused_cookie(struct fscache_cookie *cookie) -{ - wake_up_var(&cookie->n_active); -} - -/** - * fscache_unuse_cookie - Cease usage of cookie attached to an object - * @object: Object description - * - * Cease usage of the cookie attached to an object. When the users count - * reaches zero then the cookie relinquishment will be permitted to proceed. - */ -static inline void fscache_unuse_cookie(struct fscache_object *object) -{ - struct fscache_cookie *cookie = object->cookie; - if (__fscache_unuse_cookie(cookie)) - __fscache_wake_unused_cookie(cookie); -} - -/* - * out-of-line cache backend functions - */ -extern __printf(3, 4) -void fscache_init_cache(struct fscache_cache *cache, - const struct fscache_cache_ops *ops, - const char *idfmt, ...); - -extern int fscache_add_cache(struct fscache_cache *cache, - struct fscache_object *fsdef, - const char *tagname); -extern void fscache_withdraw_cache(struct fscache_cache *cache); - -extern void fscache_io_error(struct fscache_cache *cache); - -extern void fscache_mark_page_cached(struct fscache_retrieval *op, - struct page *page); - -extern void fscache_mark_pages_cached(struct fscache_retrieval *op, - struct pagevec *pagevec); - -extern bool fscache_object_sleep_till_congested(signed long *timeoutp); - -extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object, - const void *data, - uint16_t datalen, - loff_t object_size); - -extern void fscache_object_retrying_stale(struct fscache_object *object); - -enum fscache_why_object_killed { - FSCACHE_OBJECT_IS_STALE, - FSCACHE_OBJECT_NO_SPACE, - FSCACHE_OBJECT_WAS_RETIRED, - FSCACHE_OBJECT_WAS_CULLED, -}; -extern void fscache_object_mark_killed(struct fscache_object *object, - enum fscache_why_object_killed why); #endif /* _LINUX_FSCACHE_CACHE_H */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 3b2282c157f7..0364a4ca16f6 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -15,861 +15,14 @@ #define _LINUX_FSCACHE_H #include -#include -#include -#include -#include #include #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) -#define fscache_available() (1) #define fscache_cookie_valid(cookie) (cookie) +#define fscache_cookie_enabled(cookie) (cookie) #else -#define fscache_available() (0) #define fscache_cookie_valid(cookie) (0) +#define fscache_cookie_enabled(cookie) (0) #endif - -/* pattern used to fill dead space in an index entry */ -#define FSCACHE_INDEX_DEADFILL_PATTERN 0x79 - -struct pagevec; -struct fscache_cache_tag; -struct fscache_cookie; -struct fscache_netfs; -struct netfs_read_request; - -typedef void (*fscache_rw_complete_t)(struct page *page, - void *context, - int error); - -/* result of index entry consultation */ -enum fscache_checkaux { - FSCACHE_CHECKAUX_OKAY, /* entry okay as is */ - FSCACHE_CHECKAUX_NEEDS_UPDATE, /* entry requires update */ - FSCACHE_CHECKAUX_OBSOLETE, /* entry requires deletion */ -}; - -/* - * fscache cookie definition - */ -struct fscache_cookie_def { - /* name of cookie type */ - char name[16]; - - /* cookie type */ - uint8_t type; -#define FSCACHE_COOKIE_TYPE_INDEX 0 -#define FSCACHE_COOKIE_TYPE_DATAFILE 1 - - /* select the cache into which to insert an entry in this index - * - optional - * - should return a cache identifier or NULL to cause the cache to be - * inherited from the parent if possible or the first cache picked - * for a non-index file if not - */ - struct fscache_cache_tag *(*select_cache)( - const void *parent_netfs_data, - const void *cookie_netfs_data); - - /* consult the netfs about the state of an object - * - this function can be absent if the index carries no state data - * - the netfs data from the cookie being used as the target is - * presented, as is the auxiliary data and the object size - */ - enum fscache_checkaux (*check_aux)(void *cookie_netfs_data, - const void *data, - uint16_t datalen, - loff_t object_size); - - /* get an extra reference on a read context - * - this function can be absent if the completion function doesn't - * require a context - */ - void (*get_context)(void *cookie_netfs_data, void *context); - - /* release an extra reference on a read context - * - this function can be absent if the completion function doesn't - * require a context - */ - void (*put_context)(void *cookie_netfs_data, void *context); - - /* indicate page that now have cache metadata retained - * - this function should mark the specified page as now being cached - * - the page will have been marked with PG_fscache before this is - * called, so this is optional - */ - void (*mark_page_cached)(void *cookie_netfs_data, - struct address_space *mapping, - struct page *page); -}; - -/* - * fscache cached network filesystem type - * - name, version and ops must be filled in before registration - * - all other fields will be set during registration - */ -struct fscache_netfs { - uint32_t version; /* indexing version */ - const char *name; /* filesystem name */ - struct fscache_cookie *primary_index; -}; - -/* - * data file or index object cookie - * - a file will only appear in one cache - * - a request to cache a file may or may not be honoured, subject to - * constraints such as disk space - * - indices are created on disk just-in-time - */ -struct fscache_cookie { - refcount_t ref; /* number of users of this cookie */ - atomic_t n_children; /* number of children of this cookie */ - atomic_t n_active; /* number of active users of netfs ptrs */ - unsigned int debug_id; - spinlock_t lock; - spinlock_t stores_lock; /* lock on page store tree */ - struct hlist_head backing_objects; /* object(s) backing this file/index */ - const struct fscache_cookie_def *def; /* definition */ - struct fscache_cookie *parent; /* parent of this entry */ - struct hlist_bl_node hash_link; /* Link in hash table */ - struct list_head proc_link; /* Link in proc list */ - void *netfs_data; /* back pointer to netfs */ - struct radix_tree_root stores; /* pages to be stored on this cookie */ -#define FSCACHE_COOKIE_PENDING_TAG 0 /* pages tag: pending write to cache */ -#define FSCACHE_COOKIE_STORING_TAG 1 /* pages tag: writing to cache */ - - unsigned long flags; -#define FSCACHE_COOKIE_LOOKING_UP 0 /* T if non-index cookie being looked up still */ -#define FSCACHE_COOKIE_NO_DATA_YET 1 /* T if new object with no cached data yet */ -#define FSCACHE_COOKIE_UNAVAILABLE 2 /* T if cookie is unavailable (error, etc) */ -#define FSCACHE_COOKIE_INVALIDATING 3 /* T if cookie is being invalidated */ -#define FSCACHE_COOKIE_RELINQUISHED 4 /* T if cookie has been relinquished */ -#define FSCACHE_COOKIE_ENABLED 5 /* T if cookie is enabled */ -#define FSCACHE_COOKIE_ENABLEMENT_LOCK 6 /* T if cookie is being en/disabled */ -#define FSCACHE_COOKIE_AUX_UPDATED 8 /* T if the auxiliary data was updated */ -#define FSCACHE_COOKIE_ACQUIRED 9 /* T if cookie is in use */ -#define FSCACHE_COOKIE_RELINQUISHING 10 /* T if cookie is being relinquished */ - - u8 type; /* Type of object */ - u8 key_len; /* Length of index key */ - u8 aux_len; /* Length of auxiliary data */ - u32 key_hash; /* Hash of parent, type, key, len */ - union { - void *key; /* Index key */ - u8 inline_key[16]; /* - If the key is short enough */ - }; - union { - void *aux; /* Auxiliary data */ - u8 inline_aux[8]; /* - If the aux data is short enough */ - }; -}; - -static inline bool fscache_cookie_enabled(struct fscache_cookie *cookie) -{ - return fscache_cookie_valid(cookie) && test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); -} - -/* - * slow-path functions for when there is actually caching available, and the - * netfs does actually have a valid token - * - these are not to be called directly - * - these are undefined symbols when FS-Cache is not configured and the - * optimiser takes care of not using them - */ -extern int __fscache_register_netfs(struct fscache_netfs *); -extern void __fscache_unregister_netfs(struct fscache_netfs *); -extern struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *); -extern void __fscache_release_cache_tag(struct fscache_cache_tag *); - -extern struct fscache_cookie *__fscache_acquire_cookie( - struct fscache_cookie *, - const struct fscache_cookie_def *, - const void *, size_t, - const void *, size_t, - void *, loff_t, bool); -extern void __fscache_relinquish_cookie(struct fscache_cookie *, const void *, bool); -extern int __fscache_check_consistency(struct fscache_cookie *, const void *); -extern void __fscache_update_cookie(struct fscache_cookie *, const void *); -extern int __fscache_attr_changed(struct fscache_cookie *); -extern void __fscache_invalidate(struct fscache_cookie *); -extern void __fscache_wait_on_invalidate(struct fscache_cookie *); - -#ifdef FSCACHE_USE_NEW_IO_API -extern int __fscache_begin_read_operation(struct netfs_read_request *, struct fscache_cookie *); -#else -extern int __fscache_read_or_alloc_page(struct fscache_cookie *, - struct page *, - fscache_rw_complete_t, - void *, - gfp_t); -extern int __fscache_read_or_alloc_pages(struct fscache_cookie *, - struct address_space *, - struct list_head *, - unsigned *, - fscache_rw_complete_t, - void *, - gfp_t); -extern int __fscache_alloc_page(struct fscache_cookie *, struct page *, gfp_t); -extern int __fscache_write_page(struct fscache_cookie *, struct page *, loff_t, gfp_t); -extern void __fscache_uncache_page(struct fscache_cookie *, struct page *); -extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *); -extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *); -extern bool __fscache_maybe_release_page(struct fscache_cookie *, struct page *, - gfp_t); -extern void __fscache_uncache_all_inode_pages(struct fscache_cookie *, - struct inode *); -extern void __fscache_readpages_cancel(struct fscache_cookie *cookie, - struct list_head *pages); -#endif /* FSCACHE_USE_NEW_IO_API */ - -extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool); -extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_t, - bool (*)(void *), void *); - -/** - * fscache_register_netfs - Register a filesystem as desiring caching services - * @netfs: The description of the filesystem - * - * Register a filesystem as desiring caching services if they're available. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_register_netfs(struct fscache_netfs *netfs) -{ - if (fscache_available()) - return __fscache_register_netfs(netfs); - else - return 0; -} - -/** - * fscache_unregister_netfs - Indicate that a filesystem no longer desires - * caching services - * @netfs: The description of the filesystem - * - * Indicate that a filesystem no longer desires caching services for the - * moment. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -void fscache_unregister_netfs(struct fscache_netfs *netfs) -{ - if (fscache_available()) - __fscache_unregister_netfs(netfs); -} - -/** - * fscache_lookup_cache_tag - Look up a cache tag - * @name: The name of the tag to search for - * - * Acquire a specific cache referral tag that can be used to select a specific - * cache in which to cache an index. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name) -{ - if (fscache_available()) - return __fscache_lookup_cache_tag(name); - else - return NULL; -} - -/** - * fscache_release_cache_tag - Release a cache tag - * @tag: The tag to release - * - * Release a reference to a cache referral tag previously looked up. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -void fscache_release_cache_tag(struct fscache_cache_tag *tag) -{ - if (fscache_available()) - __fscache_release_cache_tag(tag); -} - -/** - * fscache_acquire_cookie - Acquire a cookie to represent a cache object - * @parent: The cookie that's to be the parent of this one - * @def: A description of the cache object, including callback operations - * @index_key: The index key for this cookie - * @index_key_len: Size of the index key - * @aux_data: The auxiliary data for the cookie (may be NULL) - * @aux_data_len: Size of the auxiliary data buffer - * @netfs_data: An arbitrary piece of data to be kept in the cookie to - * represent the cache object to the netfs - * @object_size: The initial size of object - * @enable: Whether or not to enable a data cookie immediately - * - * This function is used to inform FS-Cache about part of an index hierarchy - * that can be used to locate files. This is done by requesting a cookie for - * each index in the path to the file. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -struct fscache_cookie *fscache_acquire_cookie( - struct fscache_cookie *parent, - const struct fscache_cookie_def *def, - const void *index_key, - size_t index_key_len, - const void *aux_data, - size_t aux_data_len, - void *netfs_data, - loff_t object_size, - bool enable) -{ - if (fscache_cookie_valid(parent) && fscache_cookie_enabled(parent)) - return __fscache_acquire_cookie(parent, def, - index_key, index_key_len, - aux_data, aux_data_len, - netfs_data, object_size, enable); - else - return NULL; -} - -/** - * fscache_relinquish_cookie - Return the cookie to the cache, maybe discarding - * it - * @cookie: The cookie being returned - * @aux_data: The updated auxiliary data for the cookie (may be NULL) - * @retire: True if the cache object the cookie represents is to be discarded - * - * This function returns a cookie to the cache, forcibly discarding the - * associated cache object if retire is set to true. The opportunity is - * provided to update the auxiliary data in the cache before the object is - * disconnected. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -void fscache_relinquish_cookie(struct fscache_cookie *cookie, - const void *aux_data, - bool retire) -{ - if (fscache_cookie_valid(cookie)) - __fscache_relinquish_cookie(cookie, aux_data, retire); -} - -/** - * fscache_check_consistency - Request validation of a cache's auxiliary data - * @cookie: The cookie representing the cache object - * @aux_data: The updated auxiliary data for the cookie (may be NULL) - * - * Request an consistency check from fscache, which passes the request to the - * backing cache. The auxiliary data on the cookie will be updated first if - * @aux_data is set. - * - * Returns 0 if consistent and -ESTALE if inconsistent. May also - * return -ENOMEM and -ERESTARTSYS. - */ -static inline -int fscache_check_consistency(struct fscache_cookie *cookie, - const void *aux_data) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_check_consistency(cookie, aux_data); - else - return 0; -} - -/** - * fscache_update_cookie - Request that a cache object be updated - * @cookie: The cookie representing the cache object - * @aux_data: The updated auxiliary data for the cookie (may be NULL) - * - * Request an update of the index data for the cache object associated with the - * cookie. The auxiliary data on the cookie will be updated first if @aux_data - * is set. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - __fscache_update_cookie(cookie, aux_data); -} - -/** - * fscache_pin_cookie - Pin a data-storage cache object in its cache - * @cookie: The cookie representing the cache object - * - * Permit data-storage cache objects to be pinned in the cache. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_pin_cookie(struct fscache_cookie *cookie) -{ - return -ENOBUFS; -} - -/** - * fscache_pin_cookie - Unpin a data-storage cache object in its cache - * @cookie: The cookie representing the cache object - * - * Permit data-storage cache objects to be unpinned from the cache. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -void fscache_unpin_cookie(struct fscache_cookie *cookie) -{ -} - -/** - * fscache_attr_changed - Notify cache that an object's attributes changed - * @cookie: The cookie representing the cache object - * - * Send a notification to the cache indicating that an object's attributes have - * changed. This includes the data size. These attributes will be obtained - * through the get_attr() cookie definition op. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_attr_changed(struct fscache_cookie *cookie) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_attr_changed(cookie); - else - return -ENOBUFS; -} - -/** - * fscache_invalidate - Notify cache that an object needs invalidation - * @cookie: The cookie representing the cache object - * - * Notify the cache that an object is needs to be invalidated and that it - * should abort any retrievals or stores it is doing on the cache. The object - * is then marked non-caching until such time as the invalidation is complete. - * - * This can be called with spinlocks held. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -void fscache_invalidate(struct fscache_cookie *cookie) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - __fscache_invalidate(cookie); -} - -/** - * fscache_wait_on_invalidate - Wait for invalidation to complete - * @cookie: The cookie representing the cache object - * - * Wait for the invalidation of an object to complete. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -void fscache_wait_on_invalidate(struct fscache_cookie *cookie) -{ - if (fscache_cookie_valid(cookie)) - __fscache_wait_on_invalidate(cookie); -} - -/** - * fscache_reserve_space - Reserve data space for a cached object - * @cookie: The cookie representing the cache object - * @i_size: The amount of space to be reserved - * - * Reserve an amount of space in the cache for the cache object attached to a - * cookie so that a write to that object within the space can always be - * honoured. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size) -{ - return -ENOBUFS; -} - -#ifdef FSCACHE_USE_NEW_IO_API - -/** - * fscache_begin_read_operation - Begin a read operation for the netfs lib - * @rreq: The read request being undertaken - * @cookie: The cookie representing the cache object - * - * Begin a read operation on behalf of the netfs helper library. @rreq - * indicates the read request to which the operation state should be attached; - * @cookie indicates the cache object that will be accessed. - * - * This is intended to be called from the ->begin_cache_operation() netfs lib - * operation as implemented by the network filesystem. - * - * Returns: - * * 0 - Success - * * -ENOBUFS - No caching available - * * Other error code from the cache, such as -ENOMEM. - */ -static inline -int fscache_begin_read_operation(struct netfs_read_request *rreq, - struct fscache_cookie *cookie) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_begin_read_operation(rreq, cookie); - return -ENOBUFS; -} - -#else /* FSCACHE_USE_NEW_IO_API */ - -/** - * fscache_read_or_alloc_page - Read a page from the cache or allocate a block - * in which to store it - * @cookie: The cookie representing the cache object - * @page: The netfs page to fill if possible - * @end_io_func: The callback to invoke when and if the page is filled - * @context: An arbitrary piece of data to pass on to end_io_func() - * @gfp: The conditions under which memory allocation should be made - * - * Read a page from the cache, or if that's not possible make a potential - * one-block reservation in the cache into which the page may be stored once - * fetched from the server. - * - * If the page is not backed by the cache object, or if it there's some reason - * it can't be, -ENOBUFS will be returned and nothing more will be done for - * that page. - * - * Else, if that page is backed by the cache, a read will be initiated directly - * to the netfs's page and 0 will be returned by this function. The - * end_io_func() callback will be invoked when the operation terminates on a - * completion or failure. Note that the callback may be invoked before the - * return. - * - * Else, if the page is unbacked, -ENODATA is returned and a block may have - * been allocated in the cache. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_read_or_alloc_page(struct fscache_cookie *cookie, - struct page *page, - fscache_rw_complete_t end_io_func, - void *context, - gfp_t gfp) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_read_or_alloc_page(cookie, page, end_io_func, - context, gfp); - else - return -ENOBUFS; -} - -/** - * fscache_read_or_alloc_pages - Read pages from the cache and/or allocate - * blocks in which to store them - * @cookie: The cookie representing the cache object - * @mapping: The netfs inode mapping to which the pages will be attached - * @pages: A list of potential netfs pages to be filled - * @nr_pages: Number of pages to be read and/or allocated - * @end_io_func: The callback to invoke when and if each page is filled - * @context: An arbitrary piece of data to pass on to end_io_func() - * @gfp: The conditions under which memory allocation should be made - * - * Read a set of pages from the cache, or if that's not possible, attempt to - * make a potential one-block reservation for each page in the cache into which - * that page may be stored once fetched from the server. - * - * If some pages are not backed by the cache object, or if it there's some - * reason they can't be, -ENOBUFS will be returned and nothing more will be - * done for that pages. - * - * Else, if some of the pages are backed by the cache, a read will be initiated - * directly to the netfs's page and 0 will be returned by this function. The - * end_io_func() callback will be invoked when the operation terminates on a - * completion or failure. Note that the callback may be invoked before the - * return. - * - * Else, if a page is unbacked, -ENODATA is returned and a block may have - * been allocated in the cache. - * - * Because the function may want to return all of -ENOBUFS, -ENODATA and 0 in - * regard to different pages, the return values are prioritised in that order. - * Any pages submitted for reading are removed from the pages list. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_read_or_alloc_pages(struct fscache_cookie *cookie, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages, - fscache_rw_complete_t end_io_func, - void *context, - gfp_t gfp) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_read_or_alloc_pages(cookie, mapping, pages, - nr_pages, end_io_func, - context, gfp); - else - return -ENOBUFS; -} - -/** - * fscache_alloc_page - Allocate a block in which to store a page - * @cookie: The cookie representing the cache object - * @page: The netfs page to allocate a page for - * @gfp: The conditions under which memory allocation should be made - * - * Request Allocation a block in the cache in which to store a netfs page - * without retrieving any contents from the cache. - * - * If the page is not backed by a file then -ENOBUFS will be returned and - * nothing more will be done, and no reservation will be made. - * - * Else, a block will be allocated if one wasn't already, and 0 will be - * returned - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_alloc_page(struct fscache_cookie *cookie, - struct page *page, - gfp_t gfp) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_alloc_page(cookie, page, gfp); - else - return -ENOBUFS; -} - -/** - * fscache_readpages_cancel - Cancel read/alloc on pages - * @cookie: The cookie representing the inode's cache object. - * @pages: The netfs pages that we canceled write on in readpages() - * - * Uncache/unreserve the pages reserved earlier in readpages() via - * fscache_readpages_or_alloc() and similar. In most successful caches in - * readpages() this doesn't do anything. In cases when the underlying netfs's - * readahead failed we need to clean up the pagelist (unmark and uncache). - * - * This function may sleep as it may have to clean up disk state. - */ -static inline -void fscache_readpages_cancel(struct fscache_cookie *cookie, - struct list_head *pages) -{ - if (fscache_cookie_valid(cookie)) - __fscache_readpages_cancel(cookie, pages); -} - -/** - * fscache_write_page - Request storage of a page in the cache - * @cookie: The cookie representing the cache object - * @page: The netfs page to store - * @object_size: Updated size of object - * @gfp: The conditions under which memory allocation should be made - * - * Request the contents of the netfs page be written into the cache. This - * request may be ignored if no cache block is currently allocated, in which - * case it will return -ENOBUFS. - * - * If a cache block was already allocated, a write will be initiated and 0 will - * be returned. The PG_fscache_write page bit is set immediately and will then - * be cleared at the completion of the write to indicate the success or failure - * of the operation. Note that the completion may happen before the return. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_write_page(struct fscache_cookie *cookie, - struct page *page, - loff_t object_size, - gfp_t gfp) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_write_page(cookie, page, object_size, gfp); - else - return -ENOBUFS; -} - -/** - * fscache_uncache_page - Indicate that caching is no longer required on a page - * @cookie: The cookie representing the cache object - * @page: The netfs page that was being cached. - * - * Tell the cache that we no longer want a page to be cached and that it should - * remove any knowledge of the netfs page it may have. - * - * Note that this cannot cancel any outstanding I/O operations between this - * page and the cache. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -void fscache_uncache_page(struct fscache_cookie *cookie, - struct page *page) -{ - if (fscache_cookie_valid(cookie)) - __fscache_uncache_page(cookie, page); -} - -/** - * fscache_check_page_write - Ask if a page is being writing to the cache - * @cookie: The cookie representing the cache object - * @page: The netfs page that is being cached. - * - * Ask the cache if a page is being written to the cache. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -bool fscache_check_page_write(struct fscache_cookie *cookie, - struct page *page) -{ - if (fscache_cookie_valid(cookie)) - return __fscache_check_page_write(cookie, page); - return false; -} - -/** - * fscache_wait_on_page_write - Wait for a page to complete writing to the cache - * @cookie: The cookie representing the cache object - * @page: The netfs page that is being cached. - * - * Ask the cache to wake us up when a page is no longer being written to the - * cache. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -void fscache_wait_on_page_write(struct fscache_cookie *cookie, - struct page *page) -{ - if (fscache_cookie_valid(cookie)) - __fscache_wait_on_page_write(cookie, page); -} - -/** - * fscache_maybe_release_page - Consider releasing a page, cancelling a store - * @cookie: The cookie representing the cache object - * @page: The netfs page that is being cached. - * @gfp: The gfp flags passed to releasepage() - * - * Consider releasing a page for the vmscan algorithm, on behalf of the netfs's - * releasepage() call. A storage request on the page may cancelled if it is - * not currently being processed. - * - * The function returns true if the page no longer has a storage request on it, - * and false if a storage request is left in place. If true is returned, the - * page will have been passed to fscache_uncache_page(). If false is returned - * the page cannot be freed yet. - */ -static inline -bool fscache_maybe_release_page(struct fscache_cookie *cookie, - struct page *page, - gfp_t gfp) -{ - if (fscache_cookie_valid(cookie) && PageFsCache(page)) - return __fscache_maybe_release_page(cookie, page, gfp); - return true; -} - -/** - * fscache_uncache_all_inode_pages - Uncache all an inode's pages - * @cookie: The cookie representing the inode's cache object. - * @inode: The inode to uncache pages from. - * - * Uncache all the pages in an inode that are marked PG_fscache, assuming them - * to be associated with the given cookie. - * - * This function may sleep. It will wait for pages that are being written out - * and will wait whilst the PG_fscache mark is removed by the cache. - */ -static inline -void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, - struct inode *inode) -{ - if (fscache_cookie_valid(cookie)) - __fscache_uncache_all_inode_pages(cookie, inode); -} - -#endif /* FSCACHE_USE_NEW_IO_API */ - -/** - * fscache_disable_cookie - Disable a cookie - * @cookie: The cookie representing the cache object - * @aux_data: The updated auxiliary data for the cookie (may be NULL) - * @invalidate: Invalidate the backing object - * - * Disable a cookie from accepting further alloc, read, write, invalidate, - * update or acquire operations. Outstanding operations can still be waited - * upon and pages can still be uncached and the cookie relinquished. - * - * This will not return until all outstanding operations have completed. - * - * If @invalidate is set, then the backing object will be invalidated and - * detached, otherwise it will just be detached. - * - * If @aux_data is set, then auxiliary data will be updated from that. - */ -static inline -void fscache_disable_cookie(struct fscache_cookie *cookie, - const void *aux_data, - bool invalidate) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - __fscache_disable_cookie(cookie, aux_data, invalidate); -} - -/** - * fscache_enable_cookie - Reenable a cookie - * @cookie: The cookie representing the cache object - * @aux_data: The updated auxiliary data for the cookie (may be NULL) - * @object_size: Current size of object - * @can_enable: A function to permit enablement once lock is held - * @data: Data for can_enable() - * - * Reenable a previously disabled cookie, allowing it to accept further alloc, - * read, write, invalidate, update or acquire operations. An attempt will be - * made to immediately reattach the cookie to a backing object. If @aux_data - * is set, the auxiliary data attached to the cookie will be updated. - * - * The can_enable() function is called (if not NULL) once the enablement lock - * is held to rule on whether enablement is still permitted to go ahead. - */ -static inline -void fscache_enable_cookie(struct fscache_cookie *cookie, - const void *aux_data, - loff_t object_size, - bool (*can_enable)(void *data), - void *data) -{ - if (fscache_cookie_valid(cookie) && !fscache_cookie_enabled(cookie)) - __fscache_enable_cookie(cookie, aux_data, object_size, - can_enable, data); -} - #endif /* _LINUX_FSCACHE_H */ diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h deleted file mode 100644 index 446392f5ba83..000000000000 --- a/include/trace/events/fscache.h +++ /dev/null @@ -1,523 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* FS-Cache tracepoints - * - * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM fscache - -#if !defined(_TRACE_FSCACHE_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_FSCACHE_H - -#include -#include - -/* - * Define enums for tracing information. - */ -#ifndef __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY -#define __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY - -enum fscache_cookie_trace { - fscache_cookie_collision, - fscache_cookie_discard, - fscache_cookie_get_acquire_parent, - fscache_cookie_get_attach_object, - fscache_cookie_get_reacquire, - fscache_cookie_get_register_netfs, - fscache_cookie_put_acquire_nobufs, - fscache_cookie_put_dup_netfs, - fscache_cookie_put_relinquish, - fscache_cookie_put_object, - fscache_cookie_put_parent, -}; - -enum fscache_page_trace { - fscache_page_cached, - fscache_page_inval, - fscache_page_maybe_release, - fscache_page_radix_clear_store, - fscache_page_radix_delete, - fscache_page_radix_insert, - fscache_page_radix_pend2store, - fscache_page_radix_set_pend, - fscache_page_uncache, - fscache_page_write, - fscache_page_write_end, - fscache_page_write_end_pend, - fscache_page_write_end_noc, - fscache_page_write_wait, - fscache_page_trace__nr -}; - -enum fscache_op_trace { - fscache_op_cancel, - fscache_op_cancel_all, - fscache_op_cancelled, - fscache_op_completed, - fscache_op_enqueue_async, - fscache_op_enqueue_mythread, - fscache_op_gc, - fscache_op_init, - fscache_op_put, - fscache_op_run, - fscache_op_signal, - fscache_op_submit, - fscache_op_submit_ex, - fscache_op_work, - fscache_op_trace__nr -}; - -enum fscache_page_op_trace { - fscache_page_op_alloc_one, - fscache_page_op_attr_changed, - fscache_page_op_check_consistency, - fscache_page_op_invalidate, - fscache_page_op_retr_multi, - fscache_page_op_retr_one, - fscache_page_op_write_one, - fscache_page_op_trace__nr -}; - -#endif - -/* - * Declare tracing information enums and their string mappings for display. - */ -#define fscache_cookie_traces \ - EM(fscache_cookie_collision, "*COLLISION*") \ - EM(fscache_cookie_discard, "DISCARD") \ - EM(fscache_cookie_get_acquire_parent, "GET prn") \ - EM(fscache_cookie_get_attach_object, "GET obj") \ - EM(fscache_cookie_get_reacquire, "GET raq") \ - EM(fscache_cookie_get_register_netfs, "GET net") \ - EM(fscache_cookie_put_acquire_nobufs, "PUT nbf") \ - EM(fscache_cookie_put_dup_netfs, "PUT dnt") \ - EM(fscache_cookie_put_relinquish, "PUT rlq") \ - EM(fscache_cookie_put_object, "PUT obj") \ - E_(fscache_cookie_put_parent, "PUT prn") - -#define fscache_page_traces \ - EM(fscache_page_cached, "Cached ") \ - EM(fscache_page_inval, "InvalPg") \ - EM(fscache_page_maybe_release, "MayRels") \ - EM(fscache_page_uncache, "Uncache") \ - EM(fscache_page_radix_clear_store, "RxCStr ") \ - EM(fscache_page_radix_delete, "RxDel ") \ - EM(fscache_page_radix_insert, "RxIns ") \ - EM(fscache_page_radix_pend2store, "RxP2S ") \ - EM(fscache_page_radix_set_pend, "RxSPend ") \ - EM(fscache_page_write, "WritePg") \ - EM(fscache_page_write_end, "EndPgWr") \ - EM(fscache_page_write_end_pend, "EndPgWP") \ - EM(fscache_page_write_end_noc, "EndPgNC") \ - E_(fscache_page_write_wait, "WtOnWrt") - -#define fscache_op_traces \ - EM(fscache_op_cancel, "Cancel1") \ - EM(fscache_op_cancel_all, "CancelA") \ - EM(fscache_op_cancelled, "Canclld") \ - EM(fscache_op_completed, "Complet") \ - EM(fscache_op_enqueue_async, "EnqAsyn") \ - EM(fscache_op_enqueue_mythread, "EnqMyTh") \ - EM(fscache_op_gc, "GC ") \ - EM(fscache_op_init, "Init ") \ - EM(fscache_op_put, "Put ") \ - EM(fscache_op_run, "Run ") \ - EM(fscache_op_signal, "Signal ") \ - EM(fscache_op_submit, "Submit ") \ - EM(fscache_op_submit_ex, "SubmitX") \ - E_(fscache_op_work, "Work ") - -#define fscache_page_op_traces \ - EM(fscache_page_op_alloc_one, "Alloc1 ") \ - EM(fscache_page_op_attr_changed, "AttrChg") \ - EM(fscache_page_op_check_consistency, "CheckCn") \ - EM(fscache_page_op_invalidate, "Inval ") \ - EM(fscache_page_op_retr_multi, "RetrMul") \ - EM(fscache_page_op_retr_one, "Retr1 ") \ - E_(fscache_page_op_write_one, "Write1 ") - -/* - * Export enum symbols via userspace. - */ -#undef EM -#undef E_ -#define EM(a, b) TRACE_DEFINE_ENUM(a); -#define E_(a, b) TRACE_DEFINE_ENUM(a); - -fscache_cookie_traces; - -/* - * Now redefine the EM() and E_() macros to map the enums to the strings that - * will be printed in the output. - */ -#undef EM -#undef E_ -#define EM(a, b) { a, b }, -#define E_(a, b) { a, b } - - -TRACE_EVENT(fscache_cookie, - TP_PROTO(unsigned int cookie_debug_id, - int ref, - enum fscache_cookie_trace where), - - TP_ARGS(cookie_debug_id, ref, where), - - TP_STRUCT__entry( - __field(unsigned int, cookie ) - __field(enum fscache_cookie_trace, where ) - __field(int, ref ) - ), - - TP_fast_assign( - __entry->cookie = cookie_debug_id; - __entry->where = where; - __entry->ref = ref; - ), - - TP_printk("%s c=%08x r=%d", - __print_symbolic(__entry->where, fscache_cookie_traces), - __entry->cookie, __entry->ref) - ); - -TRACE_EVENT(fscache_netfs, - TP_PROTO(struct fscache_netfs *netfs), - - TP_ARGS(netfs), - - TP_STRUCT__entry( - __field(unsigned int, cookie ) - __array(char, name, 8 ) - ), - - TP_fast_assign( - __entry->cookie = netfs->primary_index->debug_id; - strncpy(__entry->name, netfs->name, 8); - __entry->name[7] = 0; - ), - - TP_printk("c=%08x n=%s", - __entry->cookie, __entry->name) - ); - -TRACE_EVENT(fscache_acquire, - TP_PROTO(struct fscache_cookie *cookie), - - TP_ARGS(cookie), - - TP_STRUCT__entry( - __field(unsigned int, cookie ) - __field(unsigned int, parent ) - __array(char, name, 8 ) - __field(int, p_ref ) - __field(int, p_n_children ) - __field(u8, p_flags ) - ), - - TP_fast_assign( - __entry->cookie = cookie->debug_id; - __entry->parent = cookie->parent->debug_id; - __entry->p_ref = refcount_read(&cookie->parent->ref); - __entry->p_n_children = atomic_read(&cookie->parent->n_children); - __entry->p_flags = cookie->parent->flags; - memcpy(__entry->name, cookie->def->name, 8); - __entry->name[7] = 0; - ), - - TP_printk("c=%08x p=%08x pr=%d pc=%d pf=%02x n=%s", - __entry->cookie, __entry->parent, __entry->p_ref, - __entry->p_n_children, __entry->p_flags, __entry->name) - ); - -TRACE_EVENT(fscache_relinquish, - TP_PROTO(struct fscache_cookie *cookie, bool retire), - - TP_ARGS(cookie, retire), - - TP_STRUCT__entry( - __field(unsigned int, cookie ) - __field(unsigned int, parent ) - __field(int, ref ) - __field(int, n_children ) - __field(int, n_active ) - __field(u8, flags ) - __field(bool, retire ) - ), - - TP_fast_assign( - __entry->cookie = cookie->debug_id; - __entry->parent = cookie->parent->debug_id; - __entry->ref = refcount_read(&cookie->ref); - __entry->n_children = atomic_read(&cookie->n_children); - __entry->n_active = atomic_read(&cookie->n_active); - __entry->flags = cookie->flags; - __entry->retire = retire; - ), - - TP_printk("c=%08x r=%d p=%08x Nc=%d Na=%d f=%02x r=%u", - __entry->cookie, __entry->ref, - __entry->parent, __entry->n_children, __entry->n_active, - __entry->flags, __entry->retire) - ); - -TRACE_EVENT(fscache_enable, - TP_PROTO(struct fscache_cookie *cookie), - - TP_ARGS(cookie), - - TP_STRUCT__entry( - __field(unsigned int, cookie ) - __field(int, ref ) - __field(int, n_children ) - __field(int, n_active ) - __field(u8, flags ) - ), - - TP_fast_assign( - __entry->cookie = cookie->debug_id; - __entry->ref = refcount_read(&cookie->ref); - __entry->n_children = atomic_read(&cookie->n_children); - __entry->n_active = atomic_read(&cookie->n_active); - __entry->flags = cookie->flags; - ), - - TP_printk("c=%08x r=%d Nc=%d Na=%d f=%02x", - __entry->cookie, __entry->ref, - __entry->n_children, __entry->n_active, __entry->flags) - ); - -TRACE_EVENT(fscache_disable, - TP_PROTO(struct fscache_cookie *cookie), - - TP_ARGS(cookie), - - TP_STRUCT__entry( - __field(unsigned int, cookie ) - __field(int, ref ) - __field(int, n_children ) - __field(int, n_active ) - __field(u8, flags ) - ), - - TP_fast_assign( - __entry->cookie = cookie->debug_id; - __entry->ref = refcount_read(&cookie->ref); - __entry->n_children = atomic_read(&cookie->n_children); - __entry->n_active = atomic_read(&cookie->n_active); - __entry->flags = cookie->flags; - ), - - TP_printk("c=%08x r=%d Nc=%d Na=%d f=%02x", - __entry->cookie, __entry->ref, - __entry->n_children, __entry->n_active, __entry->flags) - ); - -TRACE_EVENT(fscache_osm, - TP_PROTO(struct fscache_object *object, - const struct fscache_state *state, - bool wait, bool oob, s8 event_num), - - TP_ARGS(object, state, wait, oob, event_num), - - TP_STRUCT__entry( - __field(unsigned int, cookie ) - __field(unsigned int, object ) - __array(char, state, 8 ) - __field(bool, wait ) - __field(bool, oob ) - __field(s8, event_num ) - ), - - TP_fast_assign( - __entry->cookie = object->cookie->debug_id; - __entry->object = object->debug_id; - __entry->wait = wait; - __entry->oob = oob; - __entry->event_num = event_num; - memcpy(__entry->state, state->short_name, 8); - ), - - TP_printk("c=%08x o=%08d %s %s%sev=%d", - __entry->cookie, - __entry->object, - __entry->state, - __print_symbolic(__entry->wait, - { true, "WAIT" }, - { false, "WORK" }), - __print_symbolic(__entry->oob, - { true, " OOB " }, - { false, " " }), - __entry->event_num) - ); - -TRACE_EVENT(fscache_page, - TP_PROTO(struct fscache_cookie *cookie, struct page *page, - enum fscache_page_trace why), - - TP_ARGS(cookie, page, why), - - TP_STRUCT__entry( - __field(unsigned int, cookie ) - __field(pgoff_t, page ) - __field(enum fscache_page_trace, why ) - ), - - TP_fast_assign( - __entry->cookie = cookie->debug_id; - __entry->page = page->index; - __entry->why = why; - ), - - TP_printk("c=%08x %s pg=%lx", - __entry->cookie, - __print_symbolic(__entry->why, fscache_page_traces), - __entry->page) - ); - -TRACE_EVENT(fscache_check_page, - TP_PROTO(struct fscache_cookie *cookie, struct page *page, - void *val, int n), - - TP_ARGS(cookie, page, val, n), - - TP_STRUCT__entry( - __field(unsigned int, cookie ) - __field(void *, page ) - __field(void *, val ) - __field(int, n ) - ), - - TP_fast_assign( - __entry->cookie = cookie->debug_id; - __entry->page = page; - __entry->val = val; - __entry->n = n; - ), - - TP_printk("c=%08x pg=%p val=%p n=%d", - __entry->cookie, __entry->page, __entry->val, __entry->n) - ); - -TRACE_EVENT(fscache_wake_cookie, - TP_PROTO(struct fscache_cookie *cookie), - - TP_ARGS(cookie), - - TP_STRUCT__entry( - __field(unsigned int, cookie ) - ), - - TP_fast_assign( - __entry->cookie = cookie->debug_id; - ), - - TP_printk("c=%08x", __entry->cookie) - ); - -TRACE_EVENT(fscache_op, - TP_PROTO(struct fscache_cookie *cookie, struct fscache_operation *op, - enum fscache_op_trace why), - - TP_ARGS(cookie, op, why), - - TP_STRUCT__entry( - __field(unsigned int, cookie ) - __field(unsigned int, op ) - __field(enum fscache_op_trace, why ) - ), - - TP_fast_assign( - __entry->cookie = cookie ? cookie->debug_id : 0; - __entry->op = op->debug_id; - __entry->why = why; - ), - - TP_printk("c=%08x op=%08x %s", - __entry->cookie, __entry->op, - __print_symbolic(__entry->why, fscache_op_traces)) - ); - -TRACE_EVENT(fscache_page_op, - TP_PROTO(struct fscache_cookie *cookie, struct page *page, - struct fscache_operation *op, enum fscache_page_op_trace what), - - TP_ARGS(cookie, page, op, what), - - TP_STRUCT__entry( - __field(unsigned int, cookie ) - __field(unsigned int, op ) - __field(pgoff_t, page ) - __field(enum fscache_page_op_trace, what ) - ), - - TP_fast_assign( - __entry->cookie = cookie->debug_id; - __entry->page = page ? page->index : 0; - __entry->op = op->debug_id; - __entry->what = what; - ), - - TP_printk("c=%08x %s pg=%lx op=%08x", - __entry->cookie, - __print_symbolic(__entry->what, fscache_page_op_traces), - __entry->page, __entry->op) - ); - -TRACE_EVENT(fscache_wrote_page, - TP_PROTO(struct fscache_cookie *cookie, struct page *page, - struct fscache_operation *op, int ret), - - TP_ARGS(cookie, page, op, ret), - - TP_STRUCT__entry( - __field(unsigned int, cookie ) - __field(unsigned int, op ) - __field(pgoff_t, page ) - __field(int, ret ) - ), - - TP_fast_assign( - __entry->cookie = cookie->debug_id; - __entry->page = page->index; - __entry->op = op->debug_id; - __entry->ret = ret; - ), - - TP_printk("c=%08x pg=%lx op=%08x ret=%d", - __entry->cookie, __entry->page, __entry->op, __entry->ret) - ); - -TRACE_EVENT(fscache_gang_lookup, - TP_PROTO(struct fscache_cookie *cookie, struct fscache_operation *op, - void **results, int n, pgoff_t store_limit), - - TP_ARGS(cookie, op, results, n, store_limit), - - TP_STRUCT__entry( - __field(unsigned int, cookie ) - __field(unsigned int, op ) - __field(pgoff_t, results0 ) - __field(int, n ) - __field(pgoff_t, store_limit ) - ), - - TP_fast_assign( - __entry->cookie = cookie->debug_id; - __entry->op = op->debug_id; - __entry->results0 = results[0] ? ((struct page *)results[0])->index : (pgoff_t)-1; - __entry->n = n; - __entry->store_limit = store_limit; - ), - - TP_printk("c=%08x op=%08x r0=%lx n=%d sl=%lx", - __entry->cookie, __entry->op, __entry->results0, __entry->n, - __entry->store_limit) - ); - -#endif /* _TRACE_FSCACHE_H */ - -/* This part must be outside protection */ -#include -- cgit v1.2.3 From 9e1aa6b8f484dde5ada1212092d20ea3f55c24e8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 Oct 2021 12:05:27 +0100 Subject: netfs: Display the netfs inode number in the netfs_read tracepoint Display the netfs inode number in the netfs_read tracepoint so that this can be used to correlate with the cachefiles_prep_read tracepoint. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819581097.215744.17476611915583897051.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906885903.143852.12229407815154182247.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967078164.1823006.15286989199782861123.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021487412.640689.7544388469390936443.stgit@warthog.procyon.org.uk/ # v4 --- include/trace/events/netfs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 4d470bffd9f1..e6f4ebbb4c69 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -135,6 +135,7 @@ TRACE_EVENT(netfs_read, __field(loff_t, start ) __field(size_t, len ) __field(enum netfs_read_trace, what ) + __field(unsigned int, netfs_inode ) ), TP_fast_assign( @@ -143,12 +144,14 @@ TRACE_EVENT(netfs_read, __entry->start = start; __entry->len = len; __entry->what = what; + __entry->netfs_inode = rreq->inode->i_ino; ), - TP_printk("R=%08x %s c=%08x s=%llx %zx", + TP_printk("R=%08x %s c=%08x ni=%x s=%llx %zx", __entry->rreq, __print_symbolic(__entry->what, netfs_read_traces), __entry->cookie, + __entry->netfs_inode, __entry->start, __entry->len) ); -- cgit v1.2.3 From a39c41b853ee51f4dcd19f5556f860ae8e2f23d3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 14:30:37 +0100 Subject: netfs: Pass a flag to ->prepare_write() to say if there's no alloc'd space Pass a flag to ->prepare_write() to indicate if there's definitely no space allocated in the cache yet (for instance if we've already checked as we were asked to do a read). Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819583123.215744.12783808230464471417.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906886835.143852.6689886781122679769.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967079100.1823006.12889542712309574359.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021489334.640689.3131206613015409076.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/netfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index ca0683b9e3d1..1ea22fc48818 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -232,7 +232,8 @@ struct netfs_cache_ops { * actually do. */ int (*prepare_write)(struct netfs_cache_resources *cres, - loff_t *_start, size_t *_len, loff_t i_size); + loff_t *_start, size_t *_len, loff_t i_size, + bool no_space_allocated_yet); }; struct readahead_control; -- cgit v1.2.3 From 1e1236b841166f1d2daf36fdf6bb3e656bc5f5ca Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 14:34:41 +0100 Subject: fscache: Introduce new driver Introduce basic skeleton of the new, rewritten fscache driver. Changes ======= ver #3: - Use remove_proc_subtree(), not remove_proc_entry() to remove a populated dir. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819584034.215744.4290533472390439030.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906887770.143852.3577888294989185666.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967080039.1823006.5702921801104057922.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021491014.640689.4292699878317589512.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 2 ++ include/linux/fscache.h | 6 +++++- include/trace/events/fscache.h | 49 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 include/trace/events/fscache.h (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 47f21a53ac4b..d6910a913918 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -16,4 +16,6 @@ #include +extern struct workqueue_struct *fscache_wq; + #endif /* _LINUX_FSCACHE_CACHE_H */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 0364a4ca16f6..1cf90c252aac 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* General filesystem caching interface * - * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * NOTE!!! See: @@ -18,9 +18,13 @@ #include #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) +#define __fscache_available (1) +#define fscache_available() (1) #define fscache_cookie_valid(cookie) (cookie) #define fscache_cookie_enabled(cookie) (cookie) #else +#define __fscache_available (0) +#define fscache_available() (0) #define fscache_cookie_valid(cookie) (0) #define fscache_cookie_enabled(cookie) (0) #endif diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h new file mode 100644 index 000000000000..fe214c5cc87f --- /dev/null +++ b/include/trace/events/fscache.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* FS-Cache tracepoints + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM fscache + +#if !defined(_TRACE_FSCACHE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_FSCACHE_H + +#include +#include + +/* + * Define enums for tracing information. + */ +#ifndef __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY +#define __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY + +#endif + +/* + * Declare tracing information enums and their string mappings for display. + */ + +/* + * Export enum symbols via userspace. + */ +#undef EM +#undef E_ +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define E_(a, b) TRACE_DEFINE_ENUM(a); + +/* + * Now redefine the EM() and E_() macros to map the enums to the strings that + * will be printed in the output. + */ +#undef EM +#undef E_ +#define EM(a, b) { a, b }, +#define E_(a, b) { a, b } + + +#endif /* _TRACE_FSCACHE_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 9549332df4ed4e761a1d41c83f2c25d28bb22431 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 15:00:26 +0100 Subject: fscache: Implement cache registration Implement a register of caches and provide functions to manage it. Two functions are provided for the cache backend to use: (1) Acquire a cache cookie: struct fscache_cache *fscache_acquire_cache(const char *name) This gets the cache cookie for a cache of the specified name and moves it to the preparation state. If a nameless cache cookie exists, that will be given this name and used. (2) Relinquish a cache cookie: void fscache_relinquish_cache(struct fscache_cache *cache); This relinquishes a cache cookie, cleans it and makes it available if it's still referenced by a network filesystem. Note that network filesystems don't deal with cache cookies directly, but rather go straight to the volume registration. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819587157.215744.13523139317322503286.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906889665.143852.10378009165231294456.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967085081.1823006.2218944206363626210.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021494847.640689.10109692261640524343.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 34 +++++++++++++++++++++++++++++++++ include/trace/events/fscache.h | 43 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index d6910a913918..18cd5c9877bb 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -16,6 +16,40 @@ #include +enum fscache_cache_trace; +enum fscache_access_trace; + +enum fscache_cache_state { + FSCACHE_CACHE_IS_NOT_PRESENT, /* No cache is present for this name */ + FSCACHE_CACHE_IS_PREPARING, /* A cache is preparing to come live */ + FSCACHE_CACHE_IS_ACTIVE, /* Attached cache is active and can be used */ + FSCACHE_CACHE_GOT_IOERROR, /* Attached cache stopped on I/O error */ + FSCACHE_CACHE_IS_WITHDRAWN, /* Attached cache is being withdrawn */ +#define NR__FSCACHE_CACHE_STATE (FSCACHE_CACHE_IS_WITHDRAWN + 1) +}; + +/* + * Cache cookie. + */ +struct fscache_cache { + struct list_head cache_link; /* Link in cache list */ + void *cache_priv; /* Private cache data (or NULL) */ + refcount_t ref; + atomic_t n_volumes; /* Number of active volumes; */ + atomic_t n_accesses; /* Number of in-progress accesses on the cache */ + atomic_t object_count; /* no. of live objects in this cache */ + unsigned int debug_id; + enum fscache_cache_state state; + char *name; +}; + extern struct workqueue_struct *fscache_wq; +/* + * out-of-line cache backend functions + */ +extern struct rw_semaphore fscache_addremove_sem; +extern struct fscache_cache *fscache_acquire_cache(const char *name); +extern void fscache_relinquish_cache(struct fscache_cache *cache); + #endif /* _LINUX_FSCACHE_CACHE_H */ diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index fe214c5cc87f..3b8e0597b2c1 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -19,11 +19,27 @@ #ifndef __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY #define __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY +enum fscache_cache_trace { + fscache_cache_collision, + fscache_cache_get_acquire, + fscache_cache_new_acquire, + fscache_cache_put_cache, + fscache_cache_put_prep_failed, + fscache_cache_put_relinquish, +}; + #endif /* * Declare tracing information enums and their string mappings for display. */ +#define fscache_cache_traces \ + EM(fscache_cache_collision, "*COLLIDE*") \ + EM(fscache_cache_get_acquire, "GET acq ") \ + EM(fscache_cache_new_acquire, "NEW acq ") \ + EM(fscache_cache_put_cache, "PUT cache") \ + EM(fscache_cache_put_prep_failed, "PUT pfail") \ + E_(fscache_cache_put_relinquish, "PUT relnq") /* * Export enum symbols via userspace. @@ -33,6 +49,8 @@ #define EM(a, b) TRACE_DEFINE_ENUM(a); #define E_(a, b) TRACE_DEFINE_ENUM(a); +fscache_cache_traces; + /* * Now redefine the EM() and E_() macros to map the enums to the strings that * will be printed in the output. @@ -43,6 +61,31 @@ #define E_(a, b) { a, b } +TRACE_EVENT(fscache_cache, + TP_PROTO(unsigned int cache_debug_id, + int usage, + enum fscache_cache_trace where), + + TP_ARGS(cache_debug_id, usage, where), + + TP_STRUCT__entry( + __field(unsigned int, cache ) + __field(int, usage ) + __field(enum fscache_cache_trace, where ) + ), + + TP_fast_assign( + __entry->cache = cache_debug_id; + __entry->usage = usage; + __entry->where = where; + ), + + TP_printk("C=%08x %s r=%d", + __entry->cache, + __print_symbolic(__entry->where, fscache_cache_traces), + __entry->usage) + ); + #endif /* _TRACE_FSCACHE_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 62ab63352350e881ae693a8236b35d7d0516c78b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 15:26:17 +0100 Subject: fscache: Implement volume registration Add functions to the fscache API to allow volumes to be acquired and relinquished by the network filesystem. A volume is an index of data storage cache objects. A volume is represented by a volume cookie in the API. A filesystem would typically create a volume for a superblock and then create per-inode cookies within it. To request a volume, the filesystem calls: struct fscache_volume * fscache_acquire_volume(const char *volume_key, const char *cache_name, const void *coherency_data, size_t coherency_len) The volume_key is a printable string used to match the volume in the cache. It should not contain any '/' characters. For AFS, for example, this would be "afs,,", e.g. "afs,example.com,523001". The cache_name can be NULL, but if not it should be a string indicating the name of the cache to use if there's more than one available. The coherency data, if given, is an arbitrarily-sized blob that's attached to the volume and is compared when the volume is looked up. If it doesn't match, the old volume is judged to be out of date and it and everything within it is discarded. Acquiring a volume twice concurrently is disallowed, though the function will wait if an old volume cookie is being relinquishing. When a network filesystem has finished with a volume, it should return the volume cookie by calling: void fscache_relinquish_volume(struct fscache_volume *volume, const void *coherency_data, bool invalidate) If invalidate is true, the entire volume will be discarded; if false, the volume will be synced and the coherency data will be updated. Changes ======= ver #4: - Removed an extraneous param from kdoc on fscache_relinquish_volume()[3]. ver #3: - fscache_hash()'s size parameter is now in bytes. Use __le32 as the unit to round up to. - When comparing cookies, simply see if the attributes are the same rather than subtracting them to produce a strcmp-style return[2]. - Make the coherency data an arbitrary blob rather than a u64, but don't store it for the moment. ver #2: - Fix error check[1]. - Make a fscache_acquire_volume() return errors, including EBUSY if a conflicting volume cookie already exists. No error is printed now - that's left to the netfs. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/20211203095608.GC2480@kili/ [1] Link: https://lore.kernel.org/r/CAHk-=whtkzB446+hX0zdLsdcUJsJ=8_-0S1mE_R+YurThfUbLA@mail.gmail.com/ [2] Link: https://lore.kernel.org/r/20211220224646.30e8205c@canb.auug.org.au/ [3] Link: https://lore.kernel.org/r/163819588944.215744.1629085755564865996.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906890630.143852.13972180614535611154.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967086836.1823006.8191672796841981763.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021495816.640689.4403156093668590217.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache.h | 84 ++++++++++++++++++++++++++++++++++++++++++ include/trace/events/fscache.h | 61 +++++++++++++++++++++++++++++- 2 files changed, 144 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 1cf90c252aac..131a741a6652 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -20,13 +20,97 @@ #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) #define __fscache_available (1) #define fscache_available() (1) +#define fscache_volume_valid(volume) (volume) #define fscache_cookie_valid(cookie) (cookie) #define fscache_cookie_enabled(cookie) (cookie) #else #define __fscache_available (0) #define fscache_available() (0) +#define fscache_volume_valid(volume) (0) #define fscache_cookie_valid(cookie) (0) #define fscache_cookie_enabled(cookie) (0) #endif +/* + * Volume representation cookie. + */ +struct fscache_volume { + refcount_t ref; + atomic_t n_cookies; /* Number of data cookies in volume */ + atomic_t n_accesses; /* Number of cache accesses in progress */ + unsigned int debug_id; + unsigned int key_hash; /* Hash of key string */ + char *key; /* Volume ID, eg. "afs@example.com@1234" */ + struct list_head proc_link; /* Link in /proc/fs/fscache/volumes */ + struct hlist_bl_node hash_link; /* Link in hash table */ + struct work_struct work; + struct fscache_cache *cache; /* The cache in which this resides */ + void *cache_priv; /* Cache private data */ + spinlock_t lock; + unsigned long flags; +#define FSCACHE_VOLUME_RELINQUISHED 0 /* Volume is being cleaned up */ +#define FSCACHE_VOLUME_INVALIDATE 1 /* Volume was invalidated */ +#define FSCACHE_VOLUME_COLLIDED_WITH 2 /* Volume was collided with */ +#define FSCACHE_VOLUME_ACQUIRE_PENDING 3 /* Volume is waiting to complete acquisition */ +#define FSCACHE_VOLUME_CREATING 4 /* Volume is being created on disk */ +}; + +/* + * slow-path functions for when there is actually caching available, and the + * netfs does actually have a valid token + * - these are not to be called directly + * - these are undefined symbols when FS-Cache is not configured and the + * optimiser takes care of not using them + */ +extern struct fscache_volume *__fscache_acquire_volume(const char *, const char *, + const void *, size_t); +extern void __fscache_relinquish_volume(struct fscache_volume *, const void *, bool); + +/** + * fscache_acquire_volume - Register a volume as desiring caching services + * @volume_key: An identification string for the volume + * @cache_name: The name of the cache to use (or NULL for the default) + * @coherency_data: Piece of arbitrary coherency data to check (or NULL) + * @coherency_len: The size of the coherency data + * + * Register a volume as desiring caching services if they're available. The + * caller must provide an identifier for the volume and may also indicate which + * cache it should be in. If a preexisting volume entry is found in the cache, + * the coherency data must match otherwise the entry will be invalidated. + * + * Returns a cookie pointer on success, -ENOMEM if out of memory or -EBUSY if a + * cache volume of that name is already acquired. Note that "NULL" is a valid + * cookie pointer and can be returned if caching is refused. + */ +static inline +struct fscache_volume *fscache_acquire_volume(const char *volume_key, + const char *cache_name, + const void *coherency_data, + size_t coherency_len) +{ + if (!fscache_available()) + return NULL; + return __fscache_acquire_volume(volume_key, cache_name, + coherency_data, coherency_len); +} + +/** + * fscache_relinquish_volume - Cease caching a volume + * @volume: The volume cookie + * @coherency_data: Piece of arbitrary coherency data to set (or NULL) + * @invalidate: True if the volume should be invalidated + * + * Indicate that a filesystem no longer desires caching services for a volume. + * The caller must have relinquished all file cookies prior to calling this. + * The stored coherency data is updated. + */ +static inline +void fscache_relinquish_volume(struct fscache_volume *volume, + const void *coherency_data, + bool invalidate) +{ + if (fscache_volume_valid(volume)) + __fscache_relinquish_volume(volume, coherency_data, invalidate); +} + #endif /* _LINUX_FSCACHE_H */ diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index 3b8e0597b2c1..eeb3e7d88e20 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -23,9 +23,26 @@ enum fscache_cache_trace { fscache_cache_collision, fscache_cache_get_acquire, fscache_cache_new_acquire, + fscache_cache_put_alloc_volume, fscache_cache_put_cache, fscache_cache_put_prep_failed, fscache_cache_put_relinquish, + fscache_cache_put_volume, +}; + +enum fscache_volume_trace { + fscache_volume_collision, + fscache_volume_get_cookie, + fscache_volume_get_create_work, + fscache_volume_get_hash_collision, + fscache_volume_free, + fscache_volume_new_acquire, + fscache_volume_put_cookie, + fscache_volume_put_create_work, + fscache_volume_put_hash_collision, + fscache_volume_put_relinquish, + fscache_volume_see_create_work, + fscache_volume_see_hash_wake, }; #endif @@ -37,9 +54,25 @@ enum fscache_cache_trace { EM(fscache_cache_collision, "*COLLIDE*") \ EM(fscache_cache_get_acquire, "GET acq ") \ EM(fscache_cache_new_acquire, "NEW acq ") \ + EM(fscache_cache_put_alloc_volume, "PUT alvol") \ EM(fscache_cache_put_cache, "PUT cache") \ EM(fscache_cache_put_prep_failed, "PUT pfail") \ - E_(fscache_cache_put_relinquish, "PUT relnq") + EM(fscache_cache_put_relinquish, "PUT relnq") \ + E_(fscache_cache_put_volume, "PUT vol ") + +#define fscache_volume_traces \ + EM(fscache_volume_collision, "*COLLIDE*") \ + EM(fscache_volume_get_cookie, "GET cook ") \ + EM(fscache_volume_get_create_work, "GET creat") \ + EM(fscache_volume_get_hash_collision, "GET hcoll") \ + EM(fscache_volume_free, "FREE ") \ + EM(fscache_volume_new_acquire, "NEW acq ") \ + EM(fscache_volume_put_cookie, "PUT cook ") \ + EM(fscache_volume_put_create_work, "PUT creat") \ + EM(fscache_volume_put_hash_collision, "PUT hcoll") \ + EM(fscache_volume_put_relinquish, "PUT relnq") \ + EM(fscache_volume_see_create_work, "SEE creat") \ + E_(fscache_volume_see_hash_wake, "SEE hwake") /* * Export enum symbols via userspace. @@ -50,6 +83,7 @@ enum fscache_cache_trace { #define E_(a, b) TRACE_DEFINE_ENUM(a); fscache_cache_traces; +fscache_volume_traces; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -86,6 +120,31 @@ TRACE_EVENT(fscache_cache, __entry->usage) ); +TRACE_EVENT(fscache_volume, + TP_PROTO(unsigned int volume_debug_id, + int usage, + enum fscache_volume_trace where), + + TP_ARGS(volume_debug_id, usage, where), + + TP_STRUCT__entry( + __field(unsigned int, volume ) + __field(int, usage ) + __field(enum fscache_volume_trace, where ) + ), + + TP_fast_assign( + __entry->volume = volume_debug_id; + __entry->usage = usage; + __entry->where = where; + ), + + TP_printk("V=%08x %s u=%d", + __entry->volume, + __print_symbolic(__entry->where, fscache_volume_traces), + __entry->usage) + ); + #endif /* _TRACE_FSCACHE_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 7f3283aba39a0f395700c3b5defa4ec49d9914b3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 15:53:34 +0100 Subject: fscache: Implement cookie registration Add functions to the fscache API to allow data file cookies to be acquired and relinquished by the network filesystem. It is intended that the filesystem will create such cookies per-inode under a volume. To request a cookie, the filesystem should call: struct fscache_cookie * fscache_acquire_cookie(struct fscache_volume *volume, u8 advice, const void *index_key, size_t index_key_len, const void *aux_data, size_t aux_data_len, loff_t object_size) The filesystem must first have created a volume cookie, which is passed in here. If it passes in NULL then the function will just return a NULL cookie. A binary key should be passed in index_key and is of size index_key_len. This is saved in the cookie and is used to locate the associated data in the cache. A coherency data buffer of size aux_data_len will be allocated and initialised from the buffer pointed to by aux_data. This is used to validate cache objects when they're opened and is stored on disk with them when they're committed. The data is stored in the cookie and will be updateable by various functions in later patches. The object_size must also be given. This is also used to perform a coherency check and to size the backing storage appropriately. This function disallows a cookie from being acquired twice in parallel, though it will cause the second user to wait if the first is busy relinquishing its cookie. When a network filesystem has finished with a cookie, it should call: void fscache_relinquish_cookie(struct fscache_volume *volume, bool retire) If retire is true, any backing data will be discarded immediately. Changes ======= ver #3: - fscache_hash()'s size parameter is now in bytes. Use __le32 as the unit to round up to. - When comparing cookies, simply see if the attributes are the same rather than subtracting them to produce a strcmp-style return[1]. - Add a check to see if the cookie is still hashed at the point of freeing. ver #2: - Don't hold n_accesses elevated whilst cache is bound to a cookie, but rather add a flag that prevents the state machine from being queued when n_accesses reaches 0. - Remove the unused cookie pointer field from the fscache_acquire tracepoint. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/CAHk-=whtkzB446+hX0zdLsdcUJsJ=8_-0S1mE_R+YurThfUbLA@mail.gmail.com/ [1] Link: https://lore.kernel.org/r/163819590658.215744.14934902514281054323.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906891983.143852.6219772337558577395.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967088507.1823006.12659006350221417165.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021498432.640689.12743483856927722772.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 22 +++++++ include/linux/fscache.h | 134 +++++++++++++++++++++++++++++++++++++++++ include/trace/events/fscache.h | 111 ++++++++++++++++++++++++++++++++++ 3 files changed, 267 insertions(+) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 18cd5c9877bb..c4355b888c91 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -17,6 +17,7 @@ #include enum fscache_cache_trace; +enum fscache_cookie_trace; enum fscache_access_trace; enum fscache_cache_state { @@ -52,4 +53,25 @@ extern struct rw_semaphore fscache_addremove_sem; extern struct fscache_cache *fscache_acquire_cache(const char *name); extern void fscache_relinquish_cache(struct fscache_cache *cache); +extern struct fscache_cookie *fscache_get_cookie(struct fscache_cookie *cookie, + enum fscache_cookie_trace where); +extern void fscache_put_cookie(struct fscache_cookie *cookie, + enum fscache_cookie_trace where); +extern void fscache_set_cookie_state(struct fscache_cookie *cookie, + enum fscache_cookie_state state); + +/** + * fscache_get_key - Get a pointer to the cookie key + * @cookie: The cookie to query + * + * Return a pointer to the where a cookie's key is stored. + */ +static inline void *fscache_get_key(struct fscache_cookie *cookie) +{ + if (cookie->key_len <= sizeof(cookie->inline_key)) + return cookie->inline_key; + else + return cookie->key; +} + #endif /* _LINUX_FSCACHE_CACHE_H */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 131a741a6652..4450d17c11e8 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -31,6 +31,27 @@ #define fscache_cookie_enabled(cookie) (0) #endif +struct fscache_cookie; + +#define FSCACHE_ADV_SINGLE_CHUNK 0x01 /* The object is a single chunk of data */ +#define FSCACHE_ADV_WRITE_CACHE 0x00 /* Do cache if written to locally */ +#define FSCACHE_ADV_WRITE_NOCACHE 0x02 /* Don't cache if written to locally */ + +/* + * Data object state. + */ +enum fscache_cookie_state { + FSCACHE_COOKIE_STATE_QUIESCENT, /* The cookie is uncached */ + FSCACHE_COOKIE_STATE_LOOKING_UP, /* The cache object is being looked up */ + FSCACHE_COOKIE_STATE_CREATING, /* The cache object is being created */ + FSCACHE_COOKIE_STATE_ACTIVE, /* The cache is active, readable and writable */ + FSCACHE_COOKIE_STATE_FAILED, /* The cache failed, withdraw to clear */ + FSCACHE_COOKIE_STATE_WITHDRAWING, /* The cookie is being withdrawn */ + FSCACHE_COOKIE_STATE_RELINQUISHING, /* The cookie is being relinquished */ + FSCACHE_COOKIE_STATE_DROPPED, /* The cookie has been dropped */ +#define FSCACHE_COOKIE_STATE__NR (FSCACHE_COOKIE_STATE_DROPPED + 1) +} __attribute__((mode(byte))); + /* * Volume representation cookie. */ @@ -55,6 +76,60 @@ struct fscache_volume { #define FSCACHE_VOLUME_CREATING 4 /* Volume is being created on disk */ }; +/* + * Data file representation cookie. + * - a file will only appear in one cache + * - a request to cache a file may or may not be honoured, subject to + * constraints such as disk space + * - indices are created on disk just-in-time + */ +struct fscache_cookie { + refcount_t ref; + atomic_t n_active; /* number of active users of cookie */ + atomic_t n_accesses; /* Number of cache accesses in progress */ + unsigned int debug_id; + unsigned int inval_counter; /* Number of invalidations made */ + spinlock_t lock; + struct fscache_volume *volume; /* Parent volume of this file. */ + void *cache_priv; /* Cache-side representation */ + struct hlist_bl_node hash_link; /* Link in hash table */ + struct list_head proc_link; /* Link in proc list */ + struct list_head commit_link; /* Link in commit queue */ + struct work_struct work; /* Commit/relinq/withdraw work */ + loff_t object_size; /* Size of the netfs object */ + unsigned long unused_at; /* Time at which unused (jiffies) */ + unsigned long flags; +#define FSCACHE_COOKIE_RELINQUISHED 0 /* T if cookie has been relinquished */ +#define FSCACHE_COOKIE_RETIRED 1 /* T if this cookie has retired on relinq */ +#define FSCACHE_COOKIE_IS_CACHING 2 /* T if this cookie is cached */ +#define FSCACHE_COOKIE_NO_DATA_TO_READ 3 /* T if this cookie has nothing to read */ +#define FSCACHE_COOKIE_NEEDS_UPDATE 4 /* T if attrs have been updated */ +#define FSCACHE_COOKIE_HAS_BEEN_CACHED 5 /* T if cookie needs withdraw-on-relinq */ +#define FSCACHE_COOKIE_DISABLED 6 /* T if cookie has been disabled */ +#define FSCACHE_COOKIE_LOCAL_WRITE 7 /* T if cookie has been modified locally */ +#define FSCACHE_COOKIE_NO_ACCESS_WAKE 8 /* T if no wake when n_accesses goes 0 */ +#define FSCACHE_COOKIE_DO_RELINQUISH 9 /* T if this cookie needs relinquishment */ +#define FSCACHE_COOKIE_DO_WITHDRAW 10 /* T if this cookie needs withdrawing */ +#define FSCACHE_COOKIE_DO_LRU_DISCARD 11 /* T if this cookie needs LRU discard */ +#define FSCACHE_COOKIE_DO_PREP_TO_WRITE 12 /* T if cookie needs write preparation */ +#define FSCACHE_COOKIE_HAVE_DATA 13 /* T if this cookie has data stored */ +#define FSCACHE_COOKIE_IS_HASHED 14 /* T if this cookie is hashed */ + + enum fscache_cookie_state state; + u8 advice; /* FSCACHE_ADV_* */ + u8 key_len; /* Length of index key */ + u8 aux_len; /* Length of auxiliary data */ + u32 key_hash; /* Hash of volume, key, len */ + union { + void *key; /* Index key */ + u8 inline_key[16]; /* - If the key is short enough */ + }; + union { + void *aux; /* Auxiliary data */ + u8 inline_aux[8]; /* - If the aux data is short enough */ + }; +}; + /* * slow-path functions for when there is actually caching available, and the * netfs does actually have a valid token @@ -66,6 +141,14 @@ extern struct fscache_volume *__fscache_acquire_volume(const char *, const char const void *, size_t); extern void __fscache_relinquish_volume(struct fscache_volume *, const void *, bool); +extern struct fscache_cookie *__fscache_acquire_cookie( + struct fscache_volume *, + u8, + const void *, size_t, + const void *, size_t, + loff_t); +extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool); + /** * fscache_acquire_volume - Register a volume as desiring caching services * @volume_key: An identification string for the volume @@ -113,4 +196,55 @@ void fscache_relinquish_volume(struct fscache_volume *volume, __fscache_relinquish_volume(volume, coherency_data, invalidate); } +/** + * fscache_acquire_cookie - Acquire a cookie to represent a cache object + * @volume: The volume in which to locate/create this cookie + * @advice: Advice flags (FSCACHE_COOKIE_ADV_*) + * @index_key: The index key for this cookie + * @index_key_len: Size of the index key + * @aux_data: The auxiliary data for the cookie (may be NULL) + * @aux_data_len: Size of the auxiliary data buffer + * @object_size: The initial size of object + * + * Acquire a cookie to represent a data file within the given cache volume. + * + * See Documentation/filesystems/caching/netfs-api.rst for a complete + * description. + */ +static inline +struct fscache_cookie *fscache_acquire_cookie(struct fscache_volume *volume, + u8 advice, + const void *index_key, + size_t index_key_len, + const void *aux_data, + size_t aux_data_len, + loff_t object_size) +{ + if (!fscache_volume_valid(volume)) + return NULL; + return __fscache_acquire_cookie(volume, advice, + index_key, index_key_len, + aux_data, aux_data_len, + object_size); +} + +/** + * fscache_relinquish_cookie - Return the cookie to the cache, maybe discarding + * it + * @cookie: The cookie being returned + * @retire: True if the cache object the cookie represents is to be discarded + * + * This function returns a cookie to the cache, forcibly discarding the + * associated cache object if retire is set to true. + * + * See Documentation/filesystems/caching/netfs-api.rst for a complete + * description. + */ +static inline +void fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) +{ + if (fscache_cookie_valid(cookie)) + __fscache_relinquish_cookie(cookie, retire); +} + #endif /* _LINUX_FSCACHE_H */ diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index eeb3e7d88e20..9286e1c4b2ac 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -45,6 +45,23 @@ enum fscache_volume_trace { fscache_volume_see_hash_wake, }; +enum fscache_cookie_trace { + fscache_cookie_collision, + fscache_cookie_discard, + fscache_cookie_get_end_access, + fscache_cookie_get_hash_collision, + fscache_cookie_new_acquire, + fscache_cookie_put_hash_collision, + fscache_cookie_put_over_queued, + fscache_cookie_put_relinquish, + fscache_cookie_put_withdrawn, + fscache_cookie_put_work, + fscache_cookie_see_active, + fscache_cookie_see_relinquish, + fscache_cookie_see_withdraw, + fscache_cookie_see_work, +}; + #endif /* @@ -74,6 +91,22 @@ enum fscache_volume_trace { EM(fscache_volume_see_create_work, "SEE creat") \ E_(fscache_volume_see_hash_wake, "SEE hwake") +#define fscache_cookie_traces \ + EM(fscache_cookie_collision, "*COLLIDE*") \ + EM(fscache_cookie_discard, "DISCARD ") \ + EM(fscache_cookie_get_hash_collision, "GET hcoll") \ + EM(fscache_cookie_get_end_access, "GQ endac") \ + EM(fscache_cookie_new_acquire, "NEW acq ") \ + EM(fscache_cookie_put_hash_collision, "PUT hcoll") \ + EM(fscache_cookie_put_over_queued, "PQ overq") \ + EM(fscache_cookie_put_relinquish, "PUT relnq") \ + EM(fscache_cookie_put_withdrawn, "PUT wthdn") \ + EM(fscache_cookie_put_work, "PQ work ") \ + EM(fscache_cookie_see_active, "- activ") \ + EM(fscache_cookie_see_relinquish, "- x-rlq") \ + EM(fscache_cookie_see_withdraw, "- x-wth") \ + E_(fscache_cookie_see_work, "- work ") + /* * Export enum symbols via userspace. */ @@ -84,6 +117,7 @@ enum fscache_volume_trace { fscache_cache_traces; fscache_volume_traces; +fscache_cookie_traces; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -145,6 +179,83 @@ TRACE_EVENT(fscache_volume, __entry->usage) ); +TRACE_EVENT(fscache_cookie, + TP_PROTO(unsigned int cookie_debug_id, + int ref, + enum fscache_cookie_trace where), + + TP_ARGS(cookie_debug_id, ref, where), + + TP_STRUCT__entry( + __field(unsigned int, cookie ) + __field(int, ref ) + __field(enum fscache_cookie_trace, where ) + ), + + TP_fast_assign( + __entry->cookie = cookie_debug_id; + __entry->ref = ref; + __entry->where = where; + ), + + TP_printk("c=%08x %s r=%d", + __entry->cookie, + __print_symbolic(__entry->where, fscache_cookie_traces), + __entry->ref) + ); + +TRACE_EVENT(fscache_acquire, + TP_PROTO(struct fscache_cookie *cookie), + + TP_ARGS(cookie), + + TP_STRUCT__entry( + __field(unsigned int, cookie ) + __field(unsigned int, volume ) + __field(int, v_ref ) + __field(int, v_n_cookies ) + ), + + TP_fast_assign( + __entry->cookie = cookie->debug_id; + __entry->volume = cookie->volume->debug_id; + __entry->v_ref = refcount_read(&cookie->volume->ref); + __entry->v_n_cookies = atomic_read(&cookie->volume->n_cookies); + ), + + TP_printk("c=%08x V=%08x vr=%d vc=%d", + __entry->cookie, + __entry->volume, __entry->v_ref, __entry->v_n_cookies) + ); + +TRACE_EVENT(fscache_relinquish, + TP_PROTO(struct fscache_cookie *cookie, bool retire), + + TP_ARGS(cookie, retire), + + TP_STRUCT__entry( + __field(unsigned int, cookie ) + __field(unsigned int, volume ) + __field(int, ref ) + __field(int, n_active ) + __field(u8, flags ) + __field(bool, retire ) + ), + + TP_fast_assign( + __entry->cookie = cookie->debug_id; + __entry->volume = cookie->volume->debug_id; + __entry->ref = refcount_read(&cookie->ref); + __entry->n_active = atomic_read(&cookie->n_active); + __entry->flags = cookie->flags; + __entry->retire = retire; + ), + + TP_printk("c=%08x V=%08x r=%d U=%d f=%02x rt=%u", + __entry->cookie, __entry->volume, __entry->ref, + __entry->n_active, __entry->flags, __entry->retire) + ); + #endif /* _TRACE_FSCACHE_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 23e12e285a6ab7320a8bceead29cfe13190a6e3c Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 15:00:26 +0100 Subject: fscache: Implement cache-level access helpers Add a pair of functions to pin/unpin a cache that we're wanting to do a high-level access to (such as creating or removing a volume): bool fscache_begin_cache_access(struct fscache_cache *cache, enum fscache_access_trace why); void fscache_end_cache_access(struct fscache_cache *cache, enum fscache_access_trace why); The way the access gate works/will work is: (1) If the cache tests as not live (state is not FSCACHE_CACHE_IS_ACTIVE), then we return false to indicate access was not permitted. (2) If the cache tests as live, then we increment the n_accesses count and then recheck the liveness, ending the access if it ceased to be live. (3) When we end the access, we decrement n_accesses and wake up the any waiters if it reaches 0. (4) Whilst the cache is caching, n_accesses is kept artificially incremented to prevent wakeups from happening. (5) When the cache is taken offline, the state is changed to prevent new accesses, n_accesses is decremented and we wait for n_accesses to become 0. Note that some of this is implemented in a later patch. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819593239.215744.7537428720603638088.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906893368.143852.14164004598465617981.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967093977.1823006.6967886507023056409.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021499995.640689.18286203753480287850.stgit@warthog.procyon.org.uk/ # v4 --- include/trace/events/fscache.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include') diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index 9286e1c4b2ac..734966bc49e1 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -62,6 +62,12 @@ enum fscache_cookie_trace { fscache_cookie_see_work, }; +enum fscache_access_trace { + fscache_access_cache_pin, + fscache_access_cache_unpin, + fscache_access_unlive, +}; + #endif /* @@ -107,6 +113,11 @@ enum fscache_cookie_trace { EM(fscache_cookie_see_withdraw, "- x-wth") \ E_(fscache_cookie_see_work, "- work ") +#define fscache_access_traces \ + EM(fscache_access_cache_pin, "PIN cache ") \ + EM(fscache_access_cache_unpin, "UNPIN cache ") \ + E_(fscache_access_unlive, "END unlive ") + /* * Export enum symbols via userspace. */ @@ -118,6 +129,7 @@ enum fscache_cookie_trace { fscache_cache_traces; fscache_volume_traces; fscache_cookie_traces; +fscache_access_traces; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -204,6 +216,35 @@ TRACE_EVENT(fscache_cookie, __entry->ref) ); +TRACE_EVENT(fscache_access_cache, + TP_PROTO(unsigned int cache_debug_id, + int ref, + int n_accesses, + enum fscache_access_trace why), + + TP_ARGS(cache_debug_id, ref, n_accesses, why), + + TP_STRUCT__entry( + __field(unsigned int, cache ) + __field(int, ref ) + __field(int, n_accesses ) + __field(enum fscache_access_trace, why ) + ), + + TP_fast_assign( + __entry->cache = cache_debug_id; + __entry->ref = ref; + __entry->n_accesses = n_accesses; + __entry->why = why; + ), + + TP_printk("C=%08x %s r=%d a=%d", + __entry->cache, + __print_symbolic(__entry->why, fscache_access_traces), + __entry->ref, + __entry->n_accesses) + ); + TRACE_EVENT(fscache_acquire, TP_PROTO(struct fscache_cookie *cookie), -- cgit v1.2.3 From e6acd3299badbfb5fb0231d42481d4f5dedf5599 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 15:26:17 +0100 Subject: fscache: Implement volume-level access helpers Add a pair of helper functions to manage access to a volume, pinning the volume in place for the duration to prevent cache withdrawal from removing it: bool fscache_begin_volume_access(struct fscache_volume *volume, enum fscache_access_trace why); void fscache_end_volume_access(struct fscache_volume *volume, enum fscache_access_trace why); The way the access gate on the volume works/will work is: (1) If the cache tests as not live (state is not FSCACHE_CACHE_IS_ACTIVE), then we return false to indicate access was not permitted. (2) If the cache tests as live, then we increment the volume's n_accesses count and then recheck the cache liveness, ending the access if it ceased to be live. (3) When we end the access, we decrement the volume's n_accesses and wake up the any waiters if it reaches 0. (4) Whilst the cache is caching, the volume's n_accesses is kept artificially incremented to prevent wakeups from happening. (5) When the cache is taken offline, the state is changed to prevent new accesses, the volume's n_accesses is decremented and we wait for it to become 0. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819594158.215744.8285859817391683254.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906894315.143852.5454793807544710479.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967095028.1823006.9173132503876627466.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021501546.640689.9631510472149608443.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 4 ++++ include/trace/events/fscache.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index c4355b888c91..fbbd8a2afe12 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -53,6 +53,10 @@ extern struct rw_semaphore fscache_addremove_sem; extern struct fscache_cache *fscache_acquire_cache(const char *name); extern void fscache_relinquish_cache(struct fscache_cache *cache); +extern void fscache_end_volume_access(struct fscache_volume *volume, + struct fscache_cookie *cookie, + enum fscache_access_trace why); + extern struct fscache_cookie *fscache_get_cookie(struct fscache_cookie *cookie, enum fscache_cookie_trace where); extern void fscache_put_cookie(struct fscache_cookie *cookie, diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index 734966bc49e1..4f40cfa52469 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -43,6 +43,7 @@ enum fscache_volume_trace { fscache_volume_put_relinquish, fscache_volume_see_create_work, fscache_volume_see_hash_wake, + fscache_volume_wait_create_work, }; enum fscache_cookie_trace { @@ -245,6 +246,39 @@ TRACE_EVENT(fscache_access_cache, __entry->n_accesses) ); +TRACE_EVENT(fscache_access_volume, + TP_PROTO(unsigned int volume_debug_id, + unsigned int cookie_debug_id, + int ref, + int n_accesses, + enum fscache_access_trace why), + + TP_ARGS(volume_debug_id, cookie_debug_id, ref, n_accesses, why), + + TP_STRUCT__entry( + __field(unsigned int, volume ) + __field(unsigned int, cookie ) + __field(int, ref ) + __field(int, n_accesses ) + __field(enum fscache_access_trace, why ) + ), + + TP_fast_assign( + __entry->volume = volume_debug_id; + __entry->cookie = cookie_debug_id; + __entry->ref = ref; + __entry->n_accesses = n_accesses; + __entry->why = why; + ), + + TP_printk("V=%08x c=%08x %s r=%d a=%d", + __entry->volume, + __entry->cookie, + __print_symbolic(__entry->why, fscache_access_traces), + __entry->ref, + __entry->n_accesses) + ); + TRACE_EVENT(fscache_acquire, TP_PROTO(struct fscache_cookie *cookie), -- cgit v1.2.3 From a7733fb632722a2f085f9324f14783effe268ed3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 15:53:34 +0100 Subject: fscache: Implement cookie-level access helpers Add a number of helper functions to manage access to a cookie, pinning the cache object in place for the duration to prevent cache withdrawal from removing it: (1) void fscache_init_access_gate(struct fscache_cookie *cookie); This function initialises the access count when a cache binds to a cookie. An extra ref is taken on the access count to prevent wakeups while the cache is active. We're only interested in the wakeup when a cookie is being withdrawn and we're waiting for it to quiesce - at which point the counter will be decremented before the wait. The FSCACHE_COOKIE_NACC_ELEVATED flag is set on the cookie to keep track of the extra ref in order to handle a race between relinquishment and withdrawal both trying to drop the extra ref. (2) bool fscache_begin_cookie_access(struct fscache_cookie *cookie, enum fscache_access_trace why); This function attempts to begin access upon a cookie, pinning it in place if it's cached. If successful, it returns true and leaves a the access count incremented. (3) void fscache_end_cookie_access(struct fscache_cookie *cookie, enum fscache_access_trace why); This function drops the access count obtained by (2), permitting object withdrawal to take place when it reaches zero. A tracepoint is provided to track changes to the access counter on a cookie. Changes ======= ver #2: - Don't hold n_accesses elevated whilst cache is bound to a cookie, but rather add a flag that prevents the state machine from being queued when n_accesses reaches 0. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819595085.215744.1706073049250505427.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906895313.143852.10141619544149102193.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967095980.1823006.1133648159424418877.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021503063.640689.8870918985269528670.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 2 ++ include/trace/events/fscache.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index fbbd8a2afe12..66624407ba84 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -61,6 +61,8 @@ extern struct fscache_cookie *fscache_get_cookie(struct fscache_cookie *cookie, enum fscache_cookie_trace where); extern void fscache_put_cookie(struct fscache_cookie *cookie, enum fscache_cookie_trace where); +extern void fscache_end_cookie_access(struct fscache_cookie *cookie, + enum fscache_access_trace why); extern void fscache_set_cookie_state(struct fscache_cookie *cookie, enum fscache_cookie_state state); diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index 4f40cfa52469..b1a962adfd16 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -279,6 +279,35 @@ TRACE_EVENT(fscache_access_volume, __entry->n_accesses) ); +TRACE_EVENT(fscache_access, + TP_PROTO(unsigned int cookie_debug_id, + int ref, + int n_accesses, + enum fscache_access_trace why), + + TP_ARGS(cookie_debug_id, ref, n_accesses, why), + + TP_STRUCT__entry( + __field(unsigned int, cookie ) + __field(int, ref ) + __field(int, n_accesses ) + __field(enum fscache_access_trace, why ) + ), + + TP_fast_assign( + __entry->cookie = cookie_debug_id; + __entry->ref = ref; + __entry->n_accesses = n_accesses; + __entry->why = why; + ), + + TP_printk("c=%08x %s r=%d a=%d", + __entry->cookie, + __print_symbolic(__entry->why, fscache_access_traces), + __entry->ref, + __entry->n_accesses) + ); + TRACE_EVENT(fscache_acquire, TP_PROTO(struct fscache_cookie *cookie), -- cgit v1.2.3 From 2e0c76aee25f33c482abda6224bd87732359354d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 15:00:26 +0100 Subject: fscache: Implement functions add/remove a cache Implement functions to allow the cache backend to add or remove a cache: (1) Declare a cache to be live: int fscache_add_cache(struct fscache_cache *cache, const struct fscache_cache_ops *ops, void *cache_priv); Take a previously acquired cache cookie, set the operations table and private data and mark the cache open for access. (2) Withdraw a cache from service: void fscache_withdraw_cache(struct fscache_cache *cache); This marks the cache as withdrawn and thus prevents further cache-level and volume-level accesses. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819596022.215744.8799712491432238827.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906896599.143852.17049208999019262884.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967097870.1823006.3470041000971522030.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021505541.640689.1819714759326331054.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 66624407ba84..f78add6e7823 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -33,6 +33,7 @@ enum fscache_cache_state { * Cache cookie. */ struct fscache_cache { + const struct fscache_cache_ops *ops; struct list_head cache_link; /* Link in cache list */ void *cache_priv; /* Private cache data (or NULL) */ refcount_t ref; @@ -44,6 +45,14 @@ struct fscache_cache { char *name; }; +/* + * cache operations + */ +struct fscache_cache_ops { + /* name of cache provider */ + const char *name; +}; + extern struct workqueue_struct *fscache_wq; /* @@ -52,6 +61,10 @@ extern struct workqueue_struct *fscache_wq; extern struct rw_semaphore fscache_addremove_sem; extern struct fscache_cache *fscache_acquire_cache(const char *name); extern void fscache_relinquish_cache(struct fscache_cache *cache); +extern int fscache_add_cache(struct fscache_cache *cache, + const struct fscache_cache_ops *ops, + void *cache_priv); +extern void fscache_withdraw_cache(struct fscache_cache *cache); extern void fscache_end_volume_access(struct fscache_volume *volume, struct fscache_cookie *cookie, -- cgit v1.2.3 From bfa22da3ed652aa15acd4246fa13a0de6dbe4a59 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 15:26:17 +0100 Subject: fscache: Provide and use cache methods to lookup/create/free a volume Add cache methods to lookup, create and remove a volume. Looking up or creating the volume requires the cache pinning for access; freeing the volume requires the volume pinning for access. The ->acquire_volume() method is used to ask the cache backend to lookup and, if necessary, create a volume; the ->free_volume() method is used to free the resources for a volume. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819597821.215744.5225318658134989949.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906898645.143852.8537799955945956818.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967099771.1823006.1455197910571061835.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021507345.640689.4073511598838843040.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 7 +++++++ include/trace/events/fscache.h | 11 ++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index f78add6e7823..a10b66ca3544 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -51,6 +51,12 @@ struct fscache_cache { struct fscache_cache_ops { /* name of cache provider */ const char *name; + + /* Acquire a volume */ + void (*acquire_volume)(struct fscache_volume *volume); + + /* Free the cache's data attached to a volume */ + void (*free_volume)(struct fscache_volume *volume); }; extern struct workqueue_struct *fscache_wq; @@ -65,6 +71,7 @@ extern int fscache_add_cache(struct fscache_cache *cache, const struct fscache_cache_ops *ops, void *cache_priv); extern void fscache_withdraw_cache(struct fscache_cache *cache); +extern void fscache_withdraw_volume(struct fscache_volume *volume); extern void fscache_end_volume_access(struct fscache_volume *volume, struct fscache_cookie *cookie, diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index b1a962adfd16..1d576bd8112e 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -64,8 +64,12 @@ enum fscache_cookie_trace { }; enum fscache_access_trace { + fscache_access_acquire_volume, + fscache_access_acquire_volume_end, fscache_access_cache_pin, fscache_access_cache_unpin, + fscache_access_relinquish_volume, + fscache_access_relinquish_volume_end, fscache_access_unlive, }; @@ -96,7 +100,8 @@ enum fscache_access_trace { EM(fscache_volume_put_hash_collision, "PUT hcoll") \ EM(fscache_volume_put_relinquish, "PUT relnq") \ EM(fscache_volume_see_create_work, "SEE creat") \ - E_(fscache_volume_see_hash_wake, "SEE hwake") + EM(fscache_volume_see_hash_wake, "SEE hwake") \ + E_(fscache_volume_wait_create_work, "WAIT crea") #define fscache_cookie_traces \ EM(fscache_cookie_collision, "*COLLIDE*") \ @@ -115,8 +120,12 @@ enum fscache_access_trace { E_(fscache_cookie_see_work, "- work ") #define fscache_access_traces \ + EM(fscache_access_acquire_volume, "BEGIN acq_vol") \ + EM(fscache_access_acquire_volume_end, "END acq_vol") \ EM(fscache_access_cache_pin, "PIN cache ") \ EM(fscache_access_cache_unpin, "UNPIN cache ") \ + EM(fscache_access_relinquish_volume, "BEGIN rlq_vol") \ + EM(fscache_access_relinquish_volume_end,"END rlq_vol") \ E_(fscache_access_unlive, "END unlive ") /* -- cgit v1.2.3 From 29f18e79fe7c5f8011befeda9be6b220a350f947 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 15:00:26 +0100 Subject: fscache: Add a function for a cache backend to note an I/O error Add a function to the backend API to note an I/O error in a cache. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819598741.215744.891281275151382095.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906901316.143852.15225412215771586528.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967100721.1823006.16435671567428949398.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021508840.640689.11902836226570620424.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index a10b66ca3544..936ef731bbc7 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -73,6 +73,8 @@ extern int fscache_add_cache(struct fscache_cache *cache, extern void fscache_withdraw_cache(struct fscache_cache *cache); extern void fscache_withdraw_volume(struct fscache_volume *volume); +extern void fscache_io_error(struct fscache_cache *cache); + extern void fscache_end_volume_access(struct fscache_volume *volume, struct fscache_cookie *cookie, enum fscache_access_trace why); -- cgit v1.2.3 From 5d00e426f95e7ea036fec2a0aceb3f71d6dbdf92 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 15:53:34 +0100 Subject: fscache: Implement simple cookie state machine Implement a very simple cookie state machine to handle lookup, invalidation, withdrawal, relinquishment and, to be added later, commit on LRU discard. Three cache methods are provided: ->lookup_cookie() to look up and, if necessary, create a data storage object; ->withdraw_cookie() to free the resources associated with that object and potentially delete it; and ->prepare_to_write(), to do prepare for changes to the cached data to be modified locally. Changes ======= ver #3: - Fix a race between LRU discard and relinquishment whereby the former would override the latter and thus the latter would never happen[1]. ver #2: - Don't hold n_accesses elevated whilst cache is bound to a cookie, but rather add a flag that prevents the state machine from being queued when n_accesses reaches 0. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/599331.1639410068@warthog.procyon.org.uk/ [1] Link: https://lore.kernel.org/r/163819599657.215744.15799615296912341745.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906903925.143852.1805855338154353867.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967105456.1823006.14730395299835841776.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021510706.640689.7961423370243272583.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 27 +++++++++++++++++++++++++-- include/trace/events/fscache.h | 4 ++++ 2 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 936ef731bbc7..ae6a75976450 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -57,6 +57,15 @@ struct fscache_cache_ops { /* Free the cache's data attached to a volume */ void (*free_volume)(struct fscache_volume *volume); + + /* Look up a cookie in the cache */ + bool (*lookup_cookie)(struct fscache_cookie *cookie); + + /* Withdraw an object without any cookie access counts held */ + void (*withdraw_cookie)(struct fscache_cookie *cookie); + + /* Prepare to write to a live cache object */ + void (*prepare_to_write)(struct fscache_cookie *cookie); }; extern struct workqueue_struct *fscache_wq; @@ -72,6 +81,7 @@ extern int fscache_add_cache(struct fscache_cache *cache, void *cache_priv); extern void fscache_withdraw_cache(struct fscache_cache *cache); extern void fscache_withdraw_volume(struct fscache_volume *volume); +extern void fscache_withdraw_cookie(struct fscache_cookie *cookie); extern void fscache_io_error(struct fscache_cache *cache); @@ -85,8 +95,21 @@ extern void fscache_put_cookie(struct fscache_cookie *cookie, enum fscache_cookie_trace where); extern void fscache_end_cookie_access(struct fscache_cookie *cookie, enum fscache_access_trace why); -extern void fscache_set_cookie_state(struct fscache_cookie *cookie, - enum fscache_cookie_state state); +extern void fscache_cookie_lookup_negative(struct fscache_cookie *cookie); +extern void fscache_caching_failed(struct fscache_cookie *cookie); + +/** + * fscache_cookie_state - Read the state of a cookie + * @cookie: The cookie to query + * + * Get the state of a cookie, imposing an ordering between the cookie contents + * and the state value. Paired with fscache_set_cookie_state(). + */ +static inline +enum fscache_cookie_state fscache_cookie_state(struct fscache_cookie *cookie) +{ + return smp_load_acquire(&cookie->state); +} /** * fscache_get_key - Get a pointer to the cookie key diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index 1d576bd8112e..030c97bb9c8b 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -68,6 +68,8 @@ enum fscache_access_trace { fscache_access_acquire_volume_end, fscache_access_cache_pin, fscache_access_cache_unpin, + fscache_access_lookup_cookie_end, + fscache_access_lookup_cookie_end_failed, fscache_access_relinquish_volume, fscache_access_relinquish_volume_end, fscache_access_unlive, @@ -124,6 +126,8 @@ enum fscache_access_trace { EM(fscache_access_acquire_volume_end, "END acq_vol") \ EM(fscache_access_cache_pin, "PIN cache ") \ EM(fscache_access_cache_unpin, "UNPIN cache ") \ + EM(fscache_access_lookup_cookie_end, "END lookup ") \ + EM(fscache_access_lookup_cookie_end_failed,"END lookupf") \ EM(fscache_access_relinquish_volume, "BEGIN rlq_vol") \ EM(fscache_access_relinquish_volume_end,"END rlq_vol") \ E_(fscache_access_unlive, "END unlive ") -- cgit v1.2.3 From 12bb21a29c19aae50cfad4e2bb5c943108f34a7d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 15:53:34 +0100 Subject: fscache: Implement cookie user counting and resource pinning Provide a pair of functions to count the number of users of a cookie (open files, writeback, invalidation, resizing, reads, writes), to obtain and pin resources for the cookie and to prevent culling for the whilst there are users. The first function marks a cookie as being in use: void fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify); The caller should indicate the cookie to use and whether or not the caller is in a context that may modify the cookie (e.g. a file open O_RDWR). If the cookie is not already resourced, fscache will ask the cache backend in the background to do whatever it needs to look up, create or otherwise obtain the resources necessary to access data. This is pinned to the cookie and may not be culled, though it may be withdrawn if the cache as a whole is withdrawn. The second function removes the in-use mark from a cookie and, optionally, updates the coherency data: void fscache_unuse_cookie(struct fscache_cookie *cookie, const void *aux_data, const loff_t *object_size); If non-NULL, the aux_data buffer and/or the object_size will be saved into the cookie and will be set on the backing store when the object is committed. If this removes the last usage on a cookie, the cookie is placed onto an LRU list from which it will be removed and closed after a couple of seconds if it doesn't get reused. This prevents resource overload in the cache - in particular it prevents it from holding too many files open. Changes ======= ver #2: - Fix fscache_unuse_cookie() to use atomic_dec_and_lock() to avoid a potential race if the cookie gets reused before it completes the unusement. - Added missing transition to LRU_DISCARDING state. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819600612.215744.13678350304176542741.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906907567.143852.16979631199380722019.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967106467.1823006.6790864931048582667.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021511674.640689.10084988363699111860.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache.h | 82 +++++++++++++++++++++++++++++++++++++++++- include/trace/events/fscache.h | 12 +++++++ 2 files changed, 93 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 4450d17c11e8..e6c321e5bf73 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -22,12 +22,14 @@ #define fscache_available() (1) #define fscache_volume_valid(volume) (volume) #define fscache_cookie_valid(cookie) (cookie) -#define fscache_cookie_enabled(cookie) (cookie) +#define fscache_resources_valid(cres) ((cres)->cache_priv) +#define fscache_cookie_enabled(cookie) (cookie && !test_bit(FSCACHE_COOKIE_DISABLED, &cookie->flags)) #else #define __fscache_available (0) #define fscache_available() (0) #define fscache_volume_valid(volume) (0) #define fscache_cookie_valid(cookie) (0) +#define fscache_resources_valid(cres) (false) #define fscache_cookie_enabled(cookie) (0) #endif @@ -46,6 +48,7 @@ enum fscache_cookie_state { FSCACHE_COOKIE_STATE_CREATING, /* The cache object is being created */ FSCACHE_COOKIE_STATE_ACTIVE, /* The cache is active, readable and writable */ FSCACHE_COOKIE_STATE_FAILED, /* The cache failed, withdraw to clear */ + FSCACHE_COOKIE_STATE_LRU_DISCARDING, /* The cookie is being discarded by the LRU */ FSCACHE_COOKIE_STATE_WITHDRAWING, /* The cookie is being withdrawn */ FSCACHE_COOKIE_STATE_RELINQUISHING, /* The cookie is being relinquished */ FSCACHE_COOKIE_STATE_DROPPED, /* The cookie has been dropped */ @@ -147,6 +150,8 @@ extern struct fscache_cookie *__fscache_acquire_cookie( const void *, size_t, const void *, size_t, loff_t); +extern void __fscache_use_cookie(struct fscache_cookie *, bool); +extern void __fscache_unuse_cookie(struct fscache_cookie *, const void *, const loff_t *); extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool); /** @@ -228,6 +233,39 @@ struct fscache_cookie *fscache_acquire_cookie(struct fscache_volume *volume, object_size); } +/** + * fscache_use_cookie - Request usage of cookie attached to an object + * @object: Object description + * @will_modify: If cache is expected to be modified locally + * + * Request usage of the cookie attached to an object. The caller should tell + * the cache if the object's contents are about to be modified locally and then + * the cache can apply the policy that has been set to handle this case. + */ +static inline void fscache_use_cookie(struct fscache_cookie *cookie, + bool will_modify) +{ + if (fscache_cookie_valid(cookie)) + __fscache_use_cookie(cookie, will_modify); +} + +/** + * fscache_unuse_cookie - Cease usage of cookie attached to an object + * @object: Object description + * @aux_data: Updated auxiliary data (or NULL) + * @object_size: Revised size of the object (or NULL) + * + * Cease usage of the cookie attached to an object. When the users count + * reaches zero then the cookie relinquishment will be permitted to proceed. + */ +static inline void fscache_unuse_cookie(struct fscache_cookie *cookie, + const void *aux_data, + const loff_t *object_size) +{ + if (fscache_cookie_valid(cookie)) + __fscache_unuse_cookie(cookie, aux_data, object_size); +} + /** * fscache_relinquish_cookie - Return the cookie to the cache, maybe discarding * it @@ -247,4 +285,46 @@ void fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) __fscache_relinquish_cookie(cookie, retire); } +/* + * Find the auxiliary data on a cookie. + */ +static inline void *fscache_get_aux(struct fscache_cookie *cookie) +{ + if (cookie->aux_len <= sizeof(cookie->inline_aux)) + return cookie->inline_aux; + else + return cookie->aux; +} + +/* + * Update the auxiliary data on a cookie. + */ +static inline +void fscache_update_aux(struct fscache_cookie *cookie, + const void *aux_data, const loff_t *object_size) +{ + void *p = fscache_get_aux(cookie); + + if (aux_data && p) + memcpy(p, aux_data, cookie->aux_len); + if (object_size) + cookie->object_size = *object_size; +} + +#ifdef CONFIG_FSCACHE_STATS +extern atomic_t fscache_n_updates; +#endif + +static inline +void __fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data, + const loff_t *object_size) +{ +#ifdef CONFIG_FSCACHE_STATS + atomic_inc(&fscache_n_updates); +#endif + fscache_update_aux(cookie, aux_data, object_size); + smp_wmb(); + set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &cookie->flags); +} + #endif /* _LINUX_FSCACHE_H */ diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index 030c97bb9c8b..b0409b1fad23 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -51,13 +51,18 @@ enum fscache_cookie_trace { fscache_cookie_discard, fscache_cookie_get_end_access, fscache_cookie_get_hash_collision, + fscache_cookie_get_lru, + fscache_cookie_get_use_work, fscache_cookie_new_acquire, fscache_cookie_put_hash_collision, + fscache_cookie_put_lru, fscache_cookie_put_over_queued, fscache_cookie_put_relinquish, fscache_cookie_put_withdrawn, fscache_cookie_put_work, fscache_cookie_see_active, + fscache_cookie_see_lru_discard, + fscache_cookie_see_lru_do_one, fscache_cookie_see_relinquish, fscache_cookie_see_withdraw, fscache_cookie_see_work, @@ -68,6 +73,7 @@ enum fscache_access_trace { fscache_access_acquire_volume_end, fscache_access_cache_pin, fscache_access_cache_unpin, + fscache_access_lookup_cookie, fscache_access_lookup_cookie_end, fscache_access_lookup_cookie_end_failed, fscache_access_relinquish_volume, @@ -110,13 +116,18 @@ enum fscache_access_trace { EM(fscache_cookie_discard, "DISCARD ") \ EM(fscache_cookie_get_hash_collision, "GET hcoll") \ EM(fscache_cookie_get_end_access, "GQ endac") \ + EM(fscache_cookie_get_lru, "GET lru ") \ + EM(fscache_cookie_get_use_work, "GQ use ") \ EM(fscache_cookie_new_acquire, "NEW acq ") \ EM(fscache_cookie_put_hash_collision, "PUT hcoll") \ + EM(fscache_cookie_put_lru, "PUT lru ") \ EM(fscache_cookie_put_over_queued, "PQ overq") \ EM(fscache_cookie_put_relinquish, "PUT relnq") \ EM(fscache_cookie_put_withdrawn, "PUT wthdn") \ EM(fscache_cookie_put_work, "PQ work ") \ EM(fscache_cookie_see_active, "- activ") \ + EM(fscache_cookie_see_lru_discard, "- x-lru") \ + EM(fscache_cookie_see_lru_do_one, "- lrudo") \ EM(fscache_cookie_see_relinquish, "- x-rlq") \ EM(fscache_cookie_see_withdraw, "- x-wth") \ E_(fscache_cookie_see_work, "- work ") @@ -126,6 +137,7 @@ enum fscache_access_trace { EM(fscache_access_acquire_volume_end, "END acq_vol") \ EM(fscache_access_cache_pin, "PIN cache ") \ EM(fscache_access_cache_unpin, "UNPIN cache ") \ + EM(fscache_access_lookup_cookie, "BEGIN lookup ") \ EM(fscache_access_lookup_cookie_end, "END lookup ") \ EM(fscache_access_lookup_cookie_end_failed,"END lookupf") \ EM(fscache_access_relinquish_volume, "BEGIN rlq_vol") \ -- cgit v1.2.3 From d24af13e2e2358a602740c7817ea90da43d3e740 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 15:53:34 +0100 Subject: fscache: Implement cookie invalidation Add a function to invalidate the cache behind a cookie: void fscache_invalidate(struct fscache_cookie *cookie, const void *aux_data, loff_t size, unsigned int flags) This causes any cached data for the specified cookie to be discarded. If the cookie is marked as being in use, a new cache object will be created if possible and future I/O will use that instead. In-flight I/O should be abandoned (writes) or reconsidered (reads). Each time it is called cookie->inval_counter is incremented and this can be used to detect invalidation at the end of an I/O operation. The coherency data attached to the cookie can be updated and the cookie size should be reset. One flag is available, FSCACHE_INVAL_DIO_WRITE, which should be used to indicate invalidation due to a DIO write on a file. This will temporarily disable caching for this cookie. Changes ======= ver #2: - Should only change to inval state if can get access to cache. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819602231.215744.11206598147269491575.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906909707.143852.18056070560477964891.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967107447.1823006.5945029409592119962.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021512640.640689.11418616313147754172.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 4 ++++ include/linux/fscache.h | 31 +++++++++++++++++++++++++++++++ include/linux/netfs.h | 1 + include/trace/events/fscache.h | 25 +++++++++++++++++++++++++ 4 files changed, 61 insertions(+) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index ae6a75976450..1ad56bfd9d72 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -64,6 +64,9 @@ struct fscache_cache_ops { /* Withdraw an object without any cookie access counts held */ void (*withdraw_cookie)(struct fscache_cookie *cookie); + /* Invalidate an object */ + bool (*invalidate_cookie)(struct fscache_cookie *cookie); + /* Prepare to write to a live cache object */ void (*prepare_to_write)(struct fscache_cookie *cookie); }; @@ -96,6 +99,7 @@ extern void fscache_put_cookie(struct fscache_cookie *cookie, extern void fscache_end_cookie_access(struct fscache_cookie *cookie, enum fscache_access_trace why); extern void fscache_cookie_lookup_negative(struct fscache_cookie *cookie); +extern void fscache_resume_after_invalidation(struct fscache_cookie *cookie); extern void fscache_caching_failed(struct fscache_cookie *cookie); /** diff --git a/include/linux/fscache.h b/include/linux/fscache.h index e6c321e5bf73..0f36d1fac237 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -39,6 +39,8 @@ struct fscache_cookie; #define FSCACHE_ADV_WRITE_CACHE 0x00 /* Do cache if written to locally */ #define FSCACHE_ADV_WRITE_NOCACHE 0x02 /* Don't cache if written to locally */ +#define FSCACHE_INVAL_DIO_WRITE 0x01 /* Invalidate due to DIO write */ + /* * Data object state. */ @@ -47,6 +49,7 @@ enum fscache_cookie_state { FSCACHE_COOKIE_STATE_LOOKING_UP, /* The cache object is being looked up */ FSCACHE_COOKIE_STATE_CREATING, /* The cache object is being created */ FSCACHE_COOKIE_STATE_ACTIVE, /* The cache is active, readable and writable */ + FSCACHE_COOKIE_STATE_INVALIDATING, /* The cache is being invalidated */ FSCACHE_COOKIE_STATE_FAILED, /* The cache failed, withdraw to clear */ FSCACHE_COOKIE_STATE_LRU_DISCARDING, /* The cookie is being discarded by the LRU */ FSCACHE_COOKIE_STATE_WITHDRAWING, /* The cookie is being withdrawn */ @@ -153,6 +156,7 @@ extern struct fscache_cookie *__fscache_acquire_cookie( extern void __fscache_use_cookie(struct fscache_cookie *, bool); extern void __fscache_unuse_cookie(struct fscache_cookie *, const void *, const loff_t *); extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool); +extern void __fscache_invalidate(struct fscache_cookie *, const void *, loff_t, unsigned int); /** * fscache_acquire_volume - Register a volume as desiring caching services @@ -327,4 +331,31 @@ void __fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &cookie->flags); } +/** + * fscache_invalidate - Notify cache that an object needs invalidation + * @cookie: The cookie representing the cache object + * @aux_data: The updated auxiliary data for the cookie (may be NULL) + * @size: The revised size of the object. + * @flags: Invalidation flags (FSCACHE_INVAL_*) + * + * Notify the cache that an object is needs to be invalidated and that it + * should abort any retrievals or stores it is doing on the cache. This + * increments inval_counter on the cookie which can be used by the caller to + * reconsider I/O requests as they complete. + * + * If @flags has FSCACHE_INVAL_DIO_WRITE set, this indicates that this is due + * to a direct I/O write and will cause caching to be disabled on this cookie + * until it is completely unused. + * + * See Documentation/filesystems/caching/netfs-api.rst for a complete + * description. + */ +static inline +void fscache_invalidate(struct fscache_cookie *cookie, + const void *aux_data, loff_t size, unsigned int flags) +{ + if (fscache_cookie_enabled(cookie)) + __fscache_invalidate(cookie, aux_data, size, flags); +} + #endif /* _LINUX_FSCACHE_H */ diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 1ea22fc48818..5a46fde65759 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -124,6 +124,7 @@ struct netfs_cache_resources { void *cache_priv; void *cache_priv2; unsigned int debug_id; /* Cookie debug ID */ + unsigned int inval_counter; /* object->inval_counter at begin_op */ }; /* diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index b0409b1fad23..294792881434 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -51,6 +51,7 @@ enum fscache_cookie_trace { fscache_cookie_discard, fscache_cookie_get_end_access, fscache_cookie_get_hash_collision, + fscache_cookie_get_inval_work, fscache_cookie_get_lru, fscache_cookie_get_use_work, fscache_cookie_new_acquire, @@ -73,6 +74,8 @@ enum fscache_access_trace { fscache_access_acquire_volume_end, fscache_access_cache_pin, fscache_access_cache_unpin, + fscache_access_invalidate_cookie, + fscache_access_invalidate_cookie_end, fscache_access_lookup_cookie, fscache_access_lookup_cookie_end, fscache_access_lookup_cookie_end_failed, @@ -116,6 +119,7 @@ enum fscache_access_trace { EM(fscache_cookie_discard, "DISCARD ") \ EM(fscache_cookie_get_hash_collision, "GET hcoll") \ EM(fscache_cookie_get_end_access, "GQ endac") \ + EM(fscache_cookie_get_inval_work, "GQ inval") \ EM(fscache_cookie_get_lru, "GET lru ") \ EM(fscache_cookie_get_use_work, "GQ use ") \ EM(fscache_cookie_new_acquire, "NEW acq ") \ @@ -137,6 +141,8 @@ enum fscache_access_trace { EM(fscache_access_acquire_volume_end, "END acq_vol") \ EM(fscache_access_cache_pin, "PIN cache ") \ EM(fscache_access_cache_unpin, "UNPIN cache ") \ + EM(fscache_access_invalidate_cookie, "BEGIN inval ") \ + EM(fscache_access_invalidate_cookie_end,"END inval ") \ EM(fscache_access_lookup_cookie, "BEGIN lookup ") \ EM(fscache_access_lookup_cookie_end, "END lookup ") \ EM(fscache_access_lookup_cookie_end_failed,"END lookupf") \ @@ -385,6 +391,25 @@ TRACE_EVENT(fscache_relinquish, __entry->n_active, __entry->flags, __entry->retire) ); +TRACE_EVENT(fscache_invalidate, + TP_PROTO(struct fscache_cookie *cookie, loff_t new_size), + + TP_ARGS(cookie, new_size), + + TP_STRUCT__entry( + __field(unsigned int, cookie ) + __field(loff_t, new_size ) + ), + + TP_fast_assign( + __entry->cookie = cookie->debug_id; + __entry->new_size = new_size; + ), + + TP_printk("c=%08x sz=%llx", + __entry->cookie, __entry->new_size) + ); + #endif /* _TRACE_FSCACHE_H */ /* This part must be outside protection */ -- cgit v1.2.3 From d64f4554dd177c5891c02424a8d9e80590b55b35 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 14:06:34 +0100 Subject: fscache: Provide a means to begin an operation Provide a function to begin a read operation: int fscache_begin_read_operation( struct netfs_cache_resources *cres, struct fscache_cookie *cookie) This is primarily intended to be called by network filesystems on behalf of netfslib, but may also be called to use the I/O access functions directly. It attaches the resources required by the cache to cres struct from the supplied cookie. This holds access to the cache behind the cookie for the duration of the operation and forces cache withdrawal and cookie invalidation to perform synchronisation on the operation. cres->inval_counter is set from the cookie at this point so that it can be compared at the end of the operation. Note that this does not guarantee that the cache state is fully set up and able to perform I/O immediately; looking up and creation may be left in progress in the background. The operations intended to be called by the network filesystem, such as reading and writing, are expected to wait for the cookie to move to the correct state. This will, however, potentially sleep, waiting for a certain minimum state to be set or for operations such as invalidate to advance far enough that I/O can resume. Also provide a function for the cache to call to wait for the cache object to get to a state where it can be used for certain things: bool fscache_wait_for_operation(struct netfs_cache_resources *cres, enum fscache_want_stage stage); This looks at the cache resources provided by the begin function and waits for them to get to an appropriate stage. There's a choice of wanting just some parameters (FSCACHE_WANT_PARAM) or the ability to do I/O (FSCACHE_WANT_READ or FSCACHE_WANT_WRITE). Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819603692.215744.146724961588817028.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906910672.143852.13856103384424986357.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967110245.1823006.2239170567540431836.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021513617.640689.16627329360866150606.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 11 ++++++++++ include/linux/fscache.h | 49 ++++++++++++++++++++++++++++++++++++++++++ include/trace/events/fscache.h | 6 ++++++ 3 files changed, 66 insertions(+) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 1ad56bfd9d72..566497cf5f13 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -67,6 +67,10 @@ struct fscache_cache_ops { /* Invalidate an object */ bool (*invalidate_cookie)(struct fscache_cookie *cookie); + /* Begin an operation for the netfs lib */ + bool (*begin_operation)(struct netfs_cache_resources *cres, + enum fscache_want_state want_state); + /* Prepare to write to a live cache object */ void (*prepare_to_write)(struct fscache_cookie *cookie); }; @@ -101,6 +105,8 @@ extern void fscache_end_cookie_access(struct fscache_cookie *cookie, extern void fscache_cookie_lookup_negative(struct fscache_cookie *cookie); extern void fscache_resume_after_invalidation(struct fscache_cookie *cookie); extern void fscache_caching_failed(struct fscache_cookie *cookie); +extern bool fscache_wait_for_operation(struct netfs_cache_resources *cred, + enum fscache_want_state state); /** * fscache_cookie_state - Read the state of a cookie @@ -129,4 +135,9 @@ static inline void *fscache_get_key(struct fscache_cookie *cookie) return cookie->key; } +static inline struct fscache_cookie *fscache_cres_cookie(struct netfs_cache_resources *cres) +{ + return cres->cache_priv; +} + #endif /* _LINUX_FSCACHE_CACHE_H */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 0f36d1fac237..7cdc63c4fe35 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -41,6 +41,12 @@ struct fscache_cookie; #define FSCACHE_INVAL_DIO_WRITE 0x01 /* Invalidate due to DIO write */ +enum fscache_want_state { + FSCACHE_WANT_PARAMS, + FSCACHE_WANT_WRITE, + FSCACHE_WANT_READ, +}; + /* * Data object state. */ @@ -157,6 +163,7 @@ extern void __fscache_use_cookie(struct fscache_cookie *, bool); extern void __fscache_unuse_cookie(struct fscache_cookie *, const void *, const loff_t *); extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool); extern void __fscache_invalidate(struct fscache_cookie *, const void *, loff_t, unsigned int); +extern int __fscache_begin_read_operation(struct netfs_cache_resources *, struct fscache_cookie *); /** * fscache_acquire_volume - Register a volume as desiring caching services @@ -358,4 +365,46 @@ void fscache_invalidate(struct fscache_cookie *cookie, __fscache_invalidate(cookie, aux_data, size, flags); } +/** + * fscache_operation_valid - Return true if operations resources are usable + * @cres: The resources to check. + * + * Returns a pointer to the operations table if usable or NULL if not. + */ +static inline +const struct netfs_cache_ops *fscache_operation_valid(const struct netfs_cache_resources *cres) +{ + return fscache_resources_valid(cres) ? cres->ops : NULL; +} + +/** + * fscache_begin_read_operation - Begin a read operation for the netfs lib + * @cres: The cache resources for the read being performed + * @cookie: The cookie representing the cache object + * + * Begin a read operation on behalf of the netfs helper library. @cres + * indicates the cache resources to which the operation state should be + * attached; @cookie indicates the cache object that will be accessed. + * + * This is intended to be called from the ->begin_cache_operation() netfs lib + * operation as implemented by the network filesystem. + * + * @cres->inval_counter is set from @cookie->inval_counter for comparison at + * the end of the operation. This allows invalidation during the operation to + * be detected by the caller. + * + * Returns: + * * 0 - Success + * * -ENOBUFS - No caching available + * * Other error code from the cache, such as -ENOMEM. + */ +static inline +int fscache_begin_read_operation(struct netfs_cache_resources *cres, + struct fscache_cookie *cookie) +{ + if (fscache_cookie_enabled(cookie)) + return __fscache_begin_read_operation(cres, cookie); + return -ENOBUFS; +} + #endif /* _LINUX_FSCACHE_H */ diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index 294792881434..9f78c903b00a 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -76,6 +76,9 @@ enum fscache_access_trace { fscache_access_cache_unpin, fscache_access_invalidate_cookie, fscache_access_invalidate_cookie_end, + fscache_access_io_not_live, + fscache_access_io_read, + fscache_access_io_wait, fscache_access_lookup_cookie, fscache_access_lookup_cookie_end, fscache_access_lookup_cookie_end_failed, @@ -143,6 +146,9 @@ enum fscache_access_trace { EM(fscache_access_cache_unpin, "UNPIN cache ") \ EM(fscache_access_invalidate_cookie, "BEGIN inval ") \ EM(fscache_access_invalidate_cookie_end,"END inval ") \ + EM(fscache_access_io_not_live, "END io_notl") \ + EM(fscache_access_io_read, "BEGIN io_read") \ + EM(fscache_access_io_wait, "WAIT io ") \ EM(fscache_access_lookup_cookie, "BEGIN lookup ") \ EM(fscache_access_lookup_cookie_end, "END lookup ") \ EM(fscache_access_lookup_cookie_end_failed,"END lookupf") \ -- cgit v1.2.3 From cdf262f29488e6c3432911ec487ea41918fcbcd7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 11 Nov 2021 23:14:29 +0000 Subject: fscache: Count data storage objects in a cache Count the data storage objects that are currently allocated in a cache. This is used to pin certain cache structures until cache withdrawal is complete. Three helpers are provided to manage and make use of the count: (1) void fscache_count_object(struct fscache_cache *cache); This should be called by the cache backend to note that an object has been allocated and attached to the cache. (2) void fscache_uncount_object(struct fscache_cache *cache); This should be called by the backend to note that an object has been destroyed. This sends a wakeup event that allows cache withdrawal to proceed if it was waiting for that object. (3) void fscache_wait_for_objects(struct fscache_cache *cache); This can be used by the backend to wait for all outstanding cache object to be destroyed. Each cache's counter is displayed as part of /proc/fs/fscache/caches. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819608594.215744.1812706538117388252.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906911646.143852.168184059935530127.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967111846.1823006.9868154941573671255.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021516219.640689.4934796654308958158.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 566497cf5f13..337335d7a5e2 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -76,6 +76,7 @@ struct fscache_cache_ops { }; extern struct workqueue_struct *fscache_wq; +extern wait_queue_head_t fscache_clearance_waiters; /* * out-of-line cache backend functions @@ -140,4 +141,42 @@ static inline struct fscache_cookie *fscache_cres_cookie(struct netfs_cache_reso return cres->cache_priv; } +/** + * fscache_count_object - Tell fscache that an object has been added + * @cache: The cache to account to + * + * Tell fscache that an object has been added to the cache. This prevents the + * cache from tearing down the cache structure until the object is uncounted. + */ +static inline void fscache_count_object(struct fscache_cache *cache) +{ + atomic_inc(&cache->object_count); +} + +/** + * fscache_uncount_object - Tell fscache that an object has been removed + * @cache: The cache to account to + * + * Tell fscache that an object has been removed from the cache and will no + * longer be accessed. After this point, the cache cookie may be destroyed. + */ +static inline void fscache_uncount_object(struct fscache_cache *cache) +{ + if (atomic_dec_and_test(&cache->object_count)) + wake_up_all(&fscache_clearance_waiters); +} + +/** + * fscache_wait_for_objects - Wait for all objects to be withdrawn + * @cache: The cache to query + * + * Wait for all extant objects in a cache to finish being withdrawn + * and go away. + */ +static inline void fscache_wait_for_objects(struct fscache_cache *cache) +{ + wait_event(fscache_clearance_waiters, + atomic_read(&cache->object_count) == 0); +} + #endif /* _LINUX_FSCACHE_CACHE_H */ -- cgit v1.2.3 From 8e7a867bb7309fbf47e8c2a68798b919fc02523f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 23:06:16 +0100 Subject: fscache: Provide read/write stat counters for the cache Provide read/write stat counters for the cache backend to use. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819609532.215744.10821082637727410554.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906912598.143852.12960327989649429069.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967113830.1823006.3222957649202368162.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021517502.640689.6077928311710357342.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 337335d7a5e2..796c8b5c5305 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -179,4 +179,14 @@ static inline void fscache_wait_for_objects(struct fscache_cache *cache) atomic_read(&cache->object_count) == 0); } +#ifdef CONFIG_FSCACHE_STATS +extern atomic_t fscache_n_read; +extern atomic_t fscache_n_write; +#define fscache_count_read() atomic_inc(&fscache_n_read) +#define fscache_count_write() atomic_inc(&fscache_n_write) +#else +#define fscache_count_read() do {} while(0) +#define fscache_count_write() do {} while(0) +#endif + #endif /* _LINUX_FSCACHE_CACHE_H */ -- cgit v1.2.3 From ed1235eb78a7421cd0ac2ad09e931f8f07ccdc7c Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 23:10:46 +0100 Subject: fscache: Provide a function to let the netfs update its coherency data Provide a function to let the netfs update its coherency data: void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data, const loff_t *object_size); This will update the auxiliary data and/or the size of the object attached to a cookie if either pointer is not-NULL and flag that the disk needs to be updated. Note that fscache_unuse_cookie() also allows this to be done. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819610438.215744.4223265964131424954.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906913530.143852.18150303220217653820.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967117795.1823006.7493373142653442595.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021518440.640689.6369952464473039268.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 7cdc63c4fe35..fc77648c8af6 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -338,6 +338,28 @@ void __fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &cookie->flags); } +/** + * fscache_update_cookie - Request that a cache object be updated + * @cookie: The cookie representing the cache object + * @aux_data: The updated auxiliary data for the cookie (may be NULL) + * @object_size: The current size of the object (may be NULL) + * + * Request an update of the index data for the cache object associated with the + * cookie. The auxiliary data on the cookie will be updated first if @aux_data + * is set and the object size will be updated and the object possibly trimmed + * if @object_size is set. + * + * See Documentation/filesystems/caching/netfs-api.rst for a complete + * description. + */ +static inline +void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data, + const loff_t *object_size) +{ + if (fscache_cookie_enabled(cookie)) + __fscache_update_cookie(cookie, aux_data, object_size); +} + /** * fscache_invalidate - Notify cache that an object needs invalidation * @cookie: The cookie representing the cache object -- cgit v1.2.3 From 3a11b3a86366ccbf0818b088ffecadf8b2d61177 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Sep 2021 09:47:45 +0100 Subject: netfs: Pass more information on how to deal with a hole in the cache Pass more information to the cache on how to deal with a hole if it encounters one when trying to read from the cache. Three options are provided: (1) NETFS_READ_HOLE_IGNORE. Read the hole along with the data, assuming it to be a punched-out extent by the backing filesystem. (2) NETFS_READ_HOLE_CLEAR. If there's a hole, erase the requested region of the cache and clear the read buffer. (3) NETFS_READ_HOLE_FAIL. Fail the read if a hole is detected. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819612321.215744.9738308885948264476.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906914460.143852.6284247083607910189.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967119923.1823006.15637375885194297582.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021519762.640689.16994364383313159319.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/netfs.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 5a46fde65759..b46c39d98bbd 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -196,6 +196,15 @@ struct netfs_read_request_ops { void (*cleanup)(struct address_space *mapping, void *netfs_priv); }; +/* + * How to handle reading from a hole. + */ +enum netfs_read_from_hole { + NETFS_READ_HOLE_IGNORE, + NETFS_READ_HOLE_CLEAR, + NETFS_READ_HOLE_FAIL, +}; + /* * Table of operations for access to a cache. This is obtained by * rreq->ops->begin_cache_operation(). @@ -208,7 +217,7 @@ struct netfs_cache_ops { int (*read)(struct netfs_cache_resources *cres, loff_t start_pos, struct iov_iter *iter, - bool seek_data, + enum netfs_read_from_hole read_hole, netfs_io_terminated_t term_func, void *term_func_priv); -- cgit v1.2.3 From 9af1c6c3089b294ffa240e0fbba356666698b6d0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 14:06:34 +0100 Subject: fscache: Implement raw I/O interface Provide a pair of functions to perform raw I/O on the cache. The first function allows an arbitrary asynchronous direct-IO read to be made against a cache object, though the read should be aligned and sized appropriately for the backing device: int fscache_read(struct netfs_cache_resources *cres, loff_t start_pos, struct iov_iter *iter, enum netfs_read_from_hole read_hole, netfs_io_terminated_t term_func, void *term_func_priv); The cache resources must have been previously initialised by fscache_begin_read_operation(). A read operation is sent to the backing filesystem, starting at start_pos within the file. The size of the read is specified by the iterator, as is the location of the output buffer. If there is a hole in the data it can be ignored and left to the backing filesystem to deal with (NETFS_READ_HOLE_IGNORE), a hole at the beginning can be skipped over and the buffer padded with zeros (NETFS_READ_HOLE_CLEAR) or -ENODATA can be given (NETFS_READ_HOLE_FAIL). If term_func is not NULL, the operation may be performed asynchronously. Upon completion, successful or otherwise, (*term_func)() will be called and passed term_func_priv, along with an error or the amount of data transferred. If the op is run asynchronously, fscache_read() will return -EIOCBQUEUED. The second function allows an arbitrary asynchronous direct-IO write to be made against a cache object, though the write should be aligned and sized appropriately for the backing device: int fscache_write(struct netfs_cache_resources *cres, loff_t start_pos, struct iov_iter *iter, netfs_io_terminated_t term_func, void *term_func_priv); This works in very similar way to fscache_read(), except that there's no need to deal with holes (they're just overwritten). The caller is responsible for preventing concurrent overlapping writes. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819613224.215744.7877577215582621254.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906915386.143852.16936177636106480724.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967122632.1823006.7487049517698562172.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021521420.640689.12747258780542678309.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache.h | 74 ++++++++++++++++++++++++++++++++++++++++++ include/trace/events/fscache.h | 2 ++ 2 files changed, 76 insertions(+) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index fc77648c8af6..ae753cae0fdd 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -429,4 +429,78 @@ int fscache_begin_read_operation(struct netfs_cache_resources *cres, return -ENOBUFS; } +/** + * fscache_read - Start a read from the cache. + * @cres: The cache resources to use + * @start_pos: The beginning file offset in the cache file + * @iter: The buffer to fill - and also the length + * @read_hole: How to handle a hole in the data. + * @term_func: The function to call upon completion + * @term_func_priv: The private data for @term_func + * + * Start a read from the cache. @cres indicates the cache object to read from + * and must be obtained by a call to fscache_begin_operation() beforehand. + * + * The data is read into the iterator, @iter, and that also indicates the size + * of the operation. @start_pos is the start position in the file, though if + * @seek_data is set appropriately, the cache can use SEEK_DATA to find the + * next piece of data, writing zeros for the hole into the iterator. + * + * Upon termination of the operation, @term_func will be called and supplied + * with @term_func_priv plus the amount of data written, if successful, or the + * error code otherwise. + * + * @read_hole indicates how a partially populated region in the cache should be + * handled. It can be one of a number of settings: + * + * NETFS_READ_HOLE_IGNORE - Just try to read (may return a short read). + * + * NETFS_READ_HOLE_CLEAR - Seek for data, clearing the part of the buffer + * skipped over, then do as for IGNORE. + * + * NETFS_READ_HOLE_FAIL - Give ENODATA if we encounter a hole. + */ +static inline +int fscache_read(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + enum netfs_read_from_hole read_hole, + netfs_io_terminated_t term_func, + void *term_func_priv) +{ + const struct netfs_cache_ops *ops = fscache_operation_valid(cres); + return ops->read(cres, start_pos, iter, read_hole, + term_func, term_func_priv); +} + +/** + * fscache_write - Start a write to the cache. + * @cres: The cache resources to use + * @start_pos: The beginning file offset in the cache file + * @iter: The data to write - and also the length + * @term_func: The function to call upon completion + * @term_func_priv: The private data for @term_func + * + * Start a write to the cache. @cres indicates the cache object to write to and + * must be obtained by a call to fscache_begin_operation() beforehand. + * + * The data to be written is obtained from the iterator, @iter, and that also + * indicates the size of the operation. @start_pos is the start position in + * the file. + * + * Upon termination of the operation, @term_func will be called and supplied + * with @term_func_priv plus the amount of data written, if successful, or the + * error code otherwise. + */ +static inline +int fscache_write(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv) +{ + const struct netfs_cache_ops *ops = fscache_operation_valid(cres); + return ops->write(cres, start_pos, iter, term_func, term_func_priv); +} + #endif /* _LINUX_FSCACHE_H */ diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index 9f78c903b00a..2459d75659cf 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -79,6 +79,7 @@ enum fscache_access_trace { fscache_access_io_not_live, fscache_access_io_read, fscache_access_io_wait, + fscache_access_io_write, fscache_access_lookup_cookie, fscache_access_lookup_cookie_end, fscache_access_lookup_cookie_end_failed, @@ -149,6 +150,7 @@ enum fscache_access_trace { EM(fscache_access_io_not_live, "END io_notl") \ EM(fscache_access_io_read, "BEGIN io_read") \ EM(fscache_access_io_wait, "WAIT io ") \ + EM(fscache_access_io_write, "BEGIN io_writ") \ EM(fscache_access_lookup_cookie, "BEGIN lookup ") \ EM(fscache_access_lookup_cookie_end, "END lookup ") \ EM(fscache_access_lookup_cookie_end_failed,"END lookupf") \ -- cgit v1.2.3 From b6e16652d6c0e4f9e9b120f66966ec153f0623fc Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 14:06:34 +0100 Subject: fscache: Implement higher-level write I/O interface Provide a higher-level function than fscache_write() to perform a write from an inode's pagecache to the cache, whilst fending off concurrent writes by means of the PG_fscache mark on a page: void fscache_write_to_cache(struct fscache_cookie *cookie, struct address_space *mapping, loff_t start, size_t len, loff_t i_size, netfs_io_terminated_t term_func, void *term_func_priv, bool caching); If caching is false, this function does nothing except call (*term_func)() if given. It assumes that, in such a case, PG_fscache will not have been set on the pages. Otherwise, if caching is true, this function requires the source pages to have had PG_fscache set on them before calling. start and len define the region of the file to be modified and i_size indicates the new file size. The source pages are extracted from the mapping. term_func and term_func_priv work as for fscache_write(). The PG_fscache marks will be cleared at the end of the operation, before term_func is called or the function otherwise returns. There is an additonal helper function to clear the PG_fscache bits from a range of pages: void fscache_clear_page_bits(struct fscache_cookie *cookie, struct address_space *mapping, loff_t start, size_t len, bool caching); If caching is true, the pages to be managed are expected to be located on mapping in the range defined by start and len. If caching is false, it does nothing. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819614155.215744.5528123235123721230.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906916346.143852.15632773570362489926.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967123599.1823006.12946816026724657428.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021522672.640689.4381958316198807813.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index ae753cae0fdd..9d469613e16c 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -165,6 +165,11 @@ extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool); extern void __fscache_invalidate(struct fscache_cookie *, const void *, loff_t, unsigned int); extern int __fscache_begin_read_operation(struct netfs_cache_resources *, struct fscache_cookie *); +extern void __fscache_write_to_cache(struct fscache_cookie *, struct address_space *, + loff_t, size_t, loff_t, netfs_io_terminated_t, void *, + bool); +extern void __fscache_clear_page_bits(struct address_space *, loff_t, size_t); + /** * fscache_acquire_volume - Register a volume as desiring caching services * @volume_key: An identification string for the volume @@ -503,4 +508,62 @@ int fscache_write(struct netfs_cache_resources *cres, return ops->write(cres, start_pos, iter, term_func, term_func_priv); } +/** + * fscache_clear_page_bits - Clear the PG_fscache bits from a set of pages + * @cookie: The cookie representing the cache object + * @mapping: The netfs inode to use as the source + * @start: The start position in @mapping + * @len: The amount of data to unlock + * @caching: If PG_fscache has been set + * + * Clear the PG_fscache flag from a sequence of pages and wake up anyone who's + * waiting. + */ +static inline void fscache_clear_page_bits(struct fscache_cookie *cookie, + struct address_space *mapping, + loff_t start, size_t len, + bool caching) +{ + if (caching) + __fscache_clear_page_bits(mapping, start, len); +} + +/** + * fscache_write_to_cache - Save a write to the cache and clear PG_fscache + * @cookie: The cookie representing the cache object + * @mapping: The netfs inode to use as the source + * @start: The start position in @mapping + * @len: The amount of data to write back + * @i_size: The new size of the inode + * @term_func: The function to call upon completion + * @term_func_priv: The private data for @term_func + * @caching: If PG_fscache has been set + * + * Helper function for a netfs to write dirty data from an inode into the cache + * object that's backing it. + * + * @start and @len describe the range of the data. This does not need to be + * page-aligned, but to satisfy DIO requirements, the cache may expand it up to + * the page boundaries on either end. All the pages covering the range must be + * marked with PG_fscache. + * + * If given, @term_func will be called upon completion and supplied with + * @term_func_priv. Note that the PG_fscache flags will have been cleared by + * this point, so the netfs must retain its own pin on the mapping. + */ +static inline void fscache_write_to_cache(struct fscache_cookie *cookie, + struct address_space *mapping, + loff_t start, size_t len, loff_t i_size, + netfs_io_terminated_t term_func, + void *term_func_priv, + bool caching) +{ + if (caching) + __fscache_write_to_cache(cookie, mapping, start, len, i_size, + term_func, term_func_priv, caching); + else if (term_func) + term_func(term_func_priv, -ENOBUFS, false); + +} + #endif /* _LINUX_FSCACHE_H */ -- cgit v1.2.3 From 08276bdae68b022a7726edf7416b6748e3df5395 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 23:50:01 +0100 Subject: vfs, fscache: Implement pinning of cache usage for writeback Cachefiles has a problem in that it needs to keep the backing file for a cookie open whilst there are local modifications pending that need to be written to it. However, we don't want to keep the file open indefinitely, as that causes EMFILE/ENFILE/ENOMEM problems. Reopening the cache file, however, is a problem if this is being done due to writeback triggered by exit(). Some filesystems will oops if we try to open a file in that context because they want to access current->fs or other resources that have already been dismantled. To get around this, I added the following: (1) An inode flag, I_PINNING_FSCACHE_WB, to be set on a network filesystem inode to indicate that we have a usage count on the cookie caching that inode. (2) A flag in struct writeback_control, unpinned_fscache_wb, that is set when __writeback_single_inode() clears the last dirty page from i_pages - at which point it clears I_PINNING_FSCACHE_WB and sets this flag. This has to be done here so that clearing I_PINNING_FSCACHE_WB can be done atomically with the check of PAGECACHE_TAG_DIRTY that clears I_DIRTY_PAGES. (3) A function, fscache_set_page_dirty(), which if it is not set, sets I_PINNING_FSCACHE_WB and calls fscache_use_cookie() to pin the cache resources. (4) A function, fscache_unpin_writeback(), to be called by ->write_inode() to unuse the cookie. (5) A function, fscache_clear_inode_writeback(), to be called when the inode is evicted, before clear_inode() is called. This cleans up any lingering I_PINNING_FSCACHE_WB. The network filesystem can then use these tools to make sure that fscache_write_to_cache() can write locally modified data to the cache as well as to the server. For the future, I'm working on write helpers for netfs lib that should allow this facility to be removed by keeping track of the dirty regions separately - but that's incomplete at the moment and is also going to be affected by folios, one way or another, since it deals with pages Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819615157.215744.17623791756928043114.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906917856.143852.8224898306177154573.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967124567.1823006.14188359004568060298.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021524705.640689.17824932021727663017.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fs.h | 3 +++ include/linux/fscache.h | 41 +++++++++++++++++++++++++++++++++++++++++ include/linux/writeback.h | 1 + 3 files changed, 45 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index bbf812ce89a8..2c0b8e77d9ab 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2418,6 +2418,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * Used to detect that mark_inode_dirty() should not move * inode between dirty lists. * + * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -2440,6 +2442,7 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_CREATING (1 << 15) #define I_DONTCACHE (1 << 16) #define I_SYNC_QUEUED (1 << 17) +#define I_PINNING_FSCACHE_WB (1 << 18) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 9d469613e16c..18e725671594 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -16,6 +16,7 @@ #include #include +#include #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) #define __fscache_available (1) @@ -566,4 +567,44 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, } +#if __fscache_available +extern int fscache_set_page_dirty(struct page *page, struct fscache_cookie *cookie); +#else +#define fscache_set_page_dirty(PAGE, COOKIE) (__set_page_dirty_nobuffers((PAGE))) +#endif + +/** + * fscache_unpin_writeback - Unpin writeback resources + * @wbc: The writeback control + * @cookie: The cookie referring to the cache object + * + * Unpin the writeback resources pinned by fscache_set_page_dirty(). This is + * intended to be called by the netfs's ->write_inode() method. + */ +static inline void fscache_unpin_writeback(struct writeback_control *wbc, + struct fscache_cookie *cookie) +{ + if (wbc->unpinned_fscache_wb) + fscache_unuse_cookie(cookie, NULL, NULL); +} + +/** + * fscache_clear_inode_writeback - Clear writeback resources pinned by an inode + * @cookie: The cookie referring to the cache object + * @inode: The inode to clean up + * @aux: Auxiliary data to apply to the inode + * + * Clear any writeback resources held by an inode when the inode is evicted. + * This must be called before clear_inode() is called. + */ +static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie, + struct inode *inode, + const void *aux) +{ + if (inode->i_state & I_PINNING_FSCACHE_WB) { + loff_t i_size = i_size_read(inode); + fscache_unuse_cookie(cookie, aux, &i_size); + } +} + #endif /* _LINUX_FSCACHE_H */ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 3bfd487d1dd2..fec248ab1fec 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -68,6 +68,7 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ + unsigned unpinned_fscache_wb:1; /* Cleared I_PINNING_FSCACHE_WB */ /* * When writeback IOs are bounced through async layers, only the -- cgit v1.2.3 From 1f67e6d0b18853c641d861a671f46a4964a88510 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 14:06:34 +0100 Subject: fscache: Provide a function to note the release of a page Provide a function to be called from a network filesystem's releasepage method to indicate that a page has been released that might have been a reflection of data upon the server - and now that data must be reloaded from the server or the cache. This is used to end an optimisation for empty files, in particular files that have just been created locally, whereby we know there cannot yet be any data that we would need to read from the server or the cache. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819617128.215744.4725572296135656508.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906920354.143852.7511819614661372008.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967128061.1823006.611781655060034988.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021525963.640689.9264556596205140044.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 18e725671594..28ce258c1f87 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -607,4 +607,20 @@ static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie, } } +/** + * fscache_note_page_release - Note that a netfs page got released + * @cookie: The cookie corresponding to the file + * + * Note that a page that has been copied to the cache has been released. This + * means that future reads will need to look in the cache to see if it's there. + */ +static inline +void fscache_note_page_release(struct fscache_cookie *cookie) +{ + if (cookie && + test_bit(FSCACHE_COOKIE_HAVE_DATA, &cookie->flags) && + test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) + clear_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); +} + #endif /* _LINUX_FSCACHE_H */ -- cgit v1.2.3 From 16a96bdf92d5af06f9fa6a01a4b08e2fdfed2e5b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 14:06:34 +0100 Subject: fscache: Provide a function to resize a cookie Provide a function to change the size of the storage attached to a cookie, to match the size of the file being cached when it's changed by truncate or fallocate: void fscache_resize_cookie(struct fscache_cookie *cookie, loff_t new_size); This acts synchronously and is expected to run under the inode lock of the caller. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819621839.215744.7895597119803515402.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906922387.143852.16394459879816147793.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967128998.1823006.10740669081985775576.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021527861.640689.3466382085497236267.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 4 ++++ include/linux/fscache.h | 18 ++++++++++++++++++ include/trace/events/fscache.h | 25 +++++++++++++++++++++++++ 3 files changed, 47 insertions(+) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 796c8b5c5305..3fa4902dc87c 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -64,6 +64,10 @@ struct fscache_cache_ops { /* Withdraw an object without any cookie access counts held */ void (*withdraw_cookie)(struct fscache_cookie *cookie); + /* Change the size of a data object */ + void (*resize_cookie)(struct netfs_cache_resources *cres, + loff_t new_size); + /* Invalidate an object */ bool (*invalidate_cookie)(struct fscache_cookie *cookie); diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 28ce258c1f87..86b1c0db1de5 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -163,6 +163,7 @@ extern struct fscache_cookie *__fscache_acquire_cookie( extern void __fscache_use_cookie(struct fscache_cookie *, bool); extern void __fscache_unuse_cookie(struct fscache_cookie *, const void *, const loff_t *); extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool); +extern void __fscache_resize_cookie(struct fscache_cookie *, loff_t); extern void __fscache_invalidate(struct fscache_cookie *, const void *, loff_t, unsigned int); extern int __fscache_begin_read_operation(struct netfs_cache_resources *, struct fscache_cookie *); @@ -366,6 +367,23 @@ void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data, __fscache_update_cookie(cookie, aux_data, object_size); } +/** + * fscache_resize_cookie - Request that a cache object be resized + * @cookie: The cookie representing the cache object + * @new_size: The new size of the object (may be NULL) + * + * Request that the size of an object be changed. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_resize_cookie(struct fscache_cookie *cookie, loff_t new_size) +{ + if (fscache_cookie_enabled(cookie)) + __fscache_resize_cookie(cookie, new_size); +} + /** * fscache_invalidate - Notify cache that an object needs invalidation * @cookie: The cookie representing the cache object diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index 2459d75659cf..5fa37a8b4ec7 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -78,6 +78,7 @@ enum fscache_access_trace { fscache_access_invalidate_cookie_end, fscache_access_io_not_live, fscache_access_io_read, + fscache_access_io_resize, fscache_access_io_wait, fscache_access_io_write, fscache_access_lookup_cookie, @@ -149,6 +150,7 @@ enum fscache_access_trace { EM(fscache_access_invalidate_cookie_end,"END inval ") \ EM(fscache_access_io_not_live, "END io_notl") \ EM(fscache_access_io_read, "BEGIN io_read") \ + EM(fscache_access_io_resize, "BEGIN io_resz") \ EM(fscache_access_io_wait, "WAIT io ") \ EM(fscache_access_io_write, "BEGIN io_writ") \ EM(fscache_access_lookup_cookie, "BEGIN lookup ") \ @@ -418,6 +420,29 @@ TRACE_EVENT(fscache_invalidate, __entry->cookie, __entry->new_size) ); +TRACE_EVENT(fscache_resize, + TP_PROTO(struct fscache_cookie *cookie, loff_t new_size), + + TP_ARGS(cookie, new_size), + + TP_STRUCT__entry( + __field(unsigned int, cookie ) + __field(loff_t, old_size ) + __field(loff_t, new_size ) + ), + + TP_fast_assign( + __entry->cookie = cookie->debug_id; + __entry->old_size = cookie->object_size; + __entry->new_size = new_size; + ), + + TP_printk("c=%08x os=%08llx sz=%08llx", + __entry->cookie, + __entry->old_size, + __entry->new_size) + ); + #endif /* _TRACE_FSCACHE_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 77443f6171f32626f24b2f97494c71a6bd83831a Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 26 Nov 2021 14:32:29 +0000 Subject: cachefiles: Introduce rewritten driver Introduce basic skeleton of the rewritten cachefiles driver including config options so that it can be enabled for compilation. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819622766.215744.9108359326983195047.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906923341.143852.3856498104256721447.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967130320.1823006.15791456613198441566.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021528993.640689.9069695476048171884.stgit@warthog.procyon.org.uk/ # v4 --- include/trace/events/cachefiles.h | 49 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 include/trace/events/cachefiles.h (limited to 'include') diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h new file mode 100644 index 000000000000..5ee0aabb20be --- /dev/null +++ b/include/trace/events/cachefiles.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* CacheFiles tracepoints + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cachefiles + +#if !defined(_TRACE_CACHEFILES_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_CACHEFILES_H + +#include + +/* + * Define enums for tracing information. + */ +#ifndef __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY +#define __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY + +#endif + +/* + * Define enum -> string mappings for display. + */ + + +/* + * Export enum symbols via userspace. + */ +#undef EM +#undef E_ +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define E_(a, b) TRACE_DEFINE_ENUM(a); + +/* + * Now redefine the EM() and E_() macros to map the enums to the strings that + * will be printed in the output. + */ +#undef EM +#undef E_ +#define EM(a, b) { a, b }, +#define E_(a, b) { a, b } + + +#endif /* _TRACE_CACHEFILES_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From ecf5a6ce15f90d1fe6bc326c720d21fc0e73fc88 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 21 Oct 2021 08:42:18 +0100 Subject: cachefiles: Add a couple of tracepoints for logging errors Add two trace points to log errors, one for vfs operations like mkdir or create, and one for I/O operations, like read, write or truncate. Also add the beginnings of a struct that is going to represent a data file and place a debugging ID in it for the tracepoints to record. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819625632.215744.17907340966178411033.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906926297.143852.18267924605548658911.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967135390.1823006.2512120406360156424.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021534029.640689.1875723624947577095.stgit@warthog.procyon.org.uk/ # v4 --- include/trace/events/cachefiles.h | 94 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) (limited to 'include') diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index 5ee0aabb20be..9bd5a8a60801 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -18,11 +18,49 @@ #ifndef __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY #define __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY +enum cachefiles_error_trace { + cachefiles_trace_fallocate_error, + cachefiles_trace_getxattr_error, + cachefiles_trace_link_error, + cachefiles_trace_lookup_error, + cachefiles_trace_mkdir_error, + cachefiles_trace_notify_change_error, + cachefiles_trace_open_error, + cachefiles_trace_read_error, + cachefiles_trace_remxattr_error, + cachefiles_trace_rename_error, + cachefiles_trace_seek_error, + cachefiles_trace_setxattr_error, + cachefiles_trace_statfs_error, + cachefiles_trace_tmpfile_error, + cachefiles_trace_trunc_error, + cachefiles_trace_unlink_error, + cachefiles_trace_write_error, +}; + #endif /* * Define enum -> string mappings for display. */ +#define cachefiles_error_traces \ + EM(cachefiles_trace_fallocate_error, "fallocate") \ + EM(cachefiles_trace_getxattr_error, "getxattr") \ + EM(cachefiles_trace_link_error, "link") \ + EM(cachefiles_trace_lookup_error, "lookup") \ + EM(cachefiles_trace_mkdir_error, "mkdir") \ + EM(cachefiles_trace_notify_change_error, "notify_change") \ + EM(cachefiles_trace_open_error, "open") \ + EM(cachefiles_trace_read_error, "read") \ + EM(cachefiles_trace_remxattr_error, "remxattr") \ + EM(cachefiles_trace_rename_error, "rename") \ + EM(cachefiles_trace_seek_error, "seek") \ + EM(cachefiles_trace_setxattr_error, "setxattr") \ + EM(cachefiles_trace_statfs_error, "statfs") \ + EM(cachefiles_trace_tmpfile_error, "tmpfile") \ + EM(cachefiles_trace_trunc_error, "trunc") \ + EM(cachefiles_trace_unlink_error, "unlink") \ + E_(cachefiles_trace_write_error, "write") /* @@ -33,6 +71,8 @@ #define EM(a, b) TRACE_DEFINE_ENUM(a); #define E_(a, b) TRACE_DEFINE_ENUM(a); +cachefiles_error_traces; + /* * Now redefine the EM() and E_() macros to map the enums to the strings that * will be printed in the output. @@ -43,6 +83,60 @@ #define E_(a, b) { a, b } +TRACE_EVENT(cachefiles_vfs_error, + TP_PROTO(struct cachefiles_object *obj, struct inode *backer, + int error, enum cachefiles_error_trace where), + + TP_ARGS(obj, backer, error, where), + + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(unsigned int, backer ) + __field(enum cachefiles_error_trace, where ) + __field(short, error ) + ), + + TP_fast_assign( + __entry->obj = obj ? obj->debug_id : 0; + __entry->backer = backer->i_ino; + __entry->error = error; + __entry->where = where; + ), + + TP_printk("o=%08x b=%08x %s e=%d", + __entry->obj, + __entry->backer, + __print_symbolic(__entry->where, cachefiles_error_traces), + __entry->error) + ); + +TRACE_EVENT(cachefiles_io_error, + TP_PROTO(struct cachefiles_object *obj, struct inode *backer, + int error, enum cachefiles_error_trace where), + + TP_ARGS(obj, backer, error, where), + + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(unsigned int, backer ) + __field(enum cachefiles_error_trace, where ) + __field(short, error ) + ), + + TP_fast_assign( + __entry->obj = obj ? obj->debug_id : 0; + __entry->backer = backer->i_ino; + __entry->error = error; + __entry->where = where; + ), + + TP_printk("o=%08x b=%08x %s e=%d", + __entry->obj, + __entry->backer, + __print_symbolic(__entry->where, cachefiles_error_traces), + __entry->error) + ); + #endif /* _TRACE_CACHEFILES_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 1bd9c4e4f0494915b2391f373d25096579f835ff Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 18 Nov 2021 08:58:08 +0000 Subject: vfs, cachefiles: Mark a backing file in use with an inode flag Use an inode flag, S_KERNEL_FILE, to mark that a backing file is in use by the kernel to prevent cachefiles or other kernel services from interfering with that file. Alter rmdir to reject attempts to remove a directory marked with this flag. This is used by cachefiles to prevent cachefilesd from removing them. Using S_SWAPFILE instead isn't really viable as that has other effects in the I/O paths. Changes ======= ver #3: - Check for the object pointer being NULL in the tracepoints rather than the caller. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819630256.215744.4815885535039369574.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906931596.143852.8642051223094013028.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967141000.1823006.12920680657559677789.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021541207.640689.564689725898537127.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fs.h | 1 + include/trace/events/cachefiles.h | 42 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2c0b8e77d9ab..bcf1ca430139 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2249,6 +2249,7 @@ struct super_operations { #define S_ENCRYPTED (1 << 14) /* Encrypted file (using fs/crypto/) */ #define S_CASEFOLD (1 << 15) /* Casefolded file */ #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ +#define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ /* * Note that nosuid etc flags are inode-specific: setting some file-system diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index 9bd5a8a60801..6331cd29880d 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -83,6 +83,48 @@ cachefiles_error_traces; #define E_(a, b) { a, b } +TRACE_EVENT(cachefiles_mark_active, + TP_PROTO(struct cachefiles_object *obj, + struct inode *inode), + + TP_ARGS(obj, inode), + + /* Note that obj may be NULL */ + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(ino_t, inode ) + ), + + TP_fast_assign( + __entry->obj = obj ? obj->debug_id : 0; + __entry->inode = inode->i_ino; + ), + + TP_printk("o=%08x i=%lx", + __entry->obj, __entry->inode) + ); + +TRACE_EVENT(cachefiles_mark_inactive, + TP_PROTO(struct cachefiles_object *obj, + struct inode *inode), + + TP_ARGS(obj, inode), + + /* Note that obj may be NULL */ + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(ino_t, inode ) + ), + + TP_fast_assign( + __entry->obj = obj ? obj->debug_id : 0; + __entry->inode = inode->i_ino; + ), + + TP_printk("o=%08x i=%lx", + __entry->obj, __entry->inode) + ); + TRACE_EVENT(cachefiles_vfs_error, TP_PROTO(struct cachefiles_object *obj, struct inode *backer, int error, enum cachefiles_error_trace where), -- cgit v1.2.3 From 13871bad1ef7f41947c816a9e342aa9fa8643c5e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 16 Nov 2021 16:30:25 +0000 Subject: cachefiles: Add tracepoints for calls to the VFS Add tracepoints in cachefiles to monitor when it does various VFS operations, such as mkdir. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819638517.215744.12773133137536579766.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906938316.143852.17227990869551737803.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967147139.1823006.4909879317496543392.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021546287.640689.3501604495002415631.stgit@warthog.procyon.org.uk/ # v4 --- include/trace/events/cachefiles.h | 176 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) (limited to 'include') diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index 6331cd29880d..5975ea4977b2 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -18,6 +18,21 @@ #ifndef __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY #define __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY +enum fscache_why_object_killed { + FSCACHE_OBJECT_IS_STALE, + FSCACHE_OBJECT_IS_WEIRD, + FSCACHE_OBJECT_INVALIDATED, + FSCACHE_OBJECT_NO_SPACE, + FSCACHE_OBJECT_WAS_RETIRED, + FSCACHE_OBJECT_WAS_CULLED, +}; + +enum cachefiles_trunc_trace { + cachefiles_trunc_dio_adjust, + cachefiles_trunc_expand_tmpfile, + cachefiles_trunc_shrink, +}; + enum cachefiles_error_trace { cachefiles_trace_fallocate_error, cachefiles_trace_getxattr_error, @@ -43,6 +58,19 @@ enum cachefiles_error_trace { /* * Define enum -> string mappings for display. */ +#define cachefiles_obj_kill_traces \ + EM(FSCACHE_OBJECT_IS_STALE, "stale") \ + EM(FSCACHE_OBJECT_IS_WEIRD, "weird") \ + EM(FSCACHE_OBJECT_INVALIDATED, "inval") \ + EM(FSCACHE_OBJECT_NO_SPACE, "no_space") \ + EM(FSCACHE_OBJECT_WAS_RETIRED, "was_retired") \ + E_(FSCACHE_OBJECT_WAS_CULLED, "was_culled") + +#define cachefiles_trunc_traces \ + EM(cachefiles_trunc_dio_adjust, "DIOADJ") \ + EM(cachefiles_trunc_expand_tmpfile, "EXPTMP") \ + E_(cachefiles_trunc_shrink, "SHRINK") + #define cachefiles_error_traces \ EM(cachefiles_trace_fallocate_error, "fallocate") \ EM(cachefiles_trace_getxattr_error, "getxattr") \ @@ -71,6 +99,8 @@ enum cachefiles_error_trace { #define EM(a, b) TRACE_DEFINE_ENUM(a); #define E_(a, b) TRACE_DEFINE_ENUM(a); +cachefiles_obj_kill_traces; +cachefiles_trunc_traces; cachefiles_error_traces; /* @@ -83,6 +113,152 @@ cachefiles_error_traces; #define E_(a, b) { a, b } +TRACE_EVENT(cachefiles_lookup, + TP_PROTO(struct cachefiles_object *obj, + struct dentry *de), + + TP_ARGS(obj, de), + + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(short, error ) + __field(unsigned long, ino ) + ), + + TP_fast_assign( + __entry->obj = obj->debug_id; + __entry->ino = (!IS_ERR(de) && d_backing_inode(de) ? + d_backing_inode(de)->i_ino : 0); + __entry->error = IS_ERR(de) ? PTR_ERR(de) : 0; + ), + + TP_printk("o=%08x i=%lx e=%d", + __entry->obj, __entry->ino, __entry->error) + ); + +TRACE_EVENT(cachefiles_tmpfile, + TP_PROTO(struct cachefiles_object *obj, struct inode *backer), + + TP_ARGS(obj, backer), + + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(unsigned int, backer ) + ), + + TP_fast_assign( + __entry->obj = obj->debug_id; + __entry->backer = backer->i_ino; + ), + + TP_printk("o=%08x b=%08x", + __entry->obj, + __entry->backer) + ); + +TRACE_EVENT(cachefiles_link, + TP_PROTO(struct cachefiles_object *obj, struct inode *backer), + + TP_ARGS(obj, backer), + + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(unsigned int, backer ) + ), + + TP_fast_assign( + __entry->obj = obj->debug_id; + __entry->backer = backer->i_ino; + ), + + TP_printk("o=%08x b=%08x", + __entry->obj, + __entry->backer) + ); + +TRACE_EVENT(cachefiles_unlink, + TP_PROTO(struct cachefiles_object *obj, + struct dentry *de, + enum fscache_why_object_killed why), + + TP_ARGS(obj, de, why), + + /* Note that obj may be NULL */ + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(struct dentry *, de ) + __field(enum fscache_why_object_killed, why ) + ), + + TP_fast_assign( + __entry->obj = obj ? obj->debug_id : UINT_MAX; + __entry->de = de; + __entry->why = why; + ), + + TP_printk("o=%08x d=%p w=%s", + __entry->obj, __entry->de, + __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) + ); + +TRACE_EVENT(cachefiles_rename, + TP_PROTO(struct cachefiles_object *obj, + struct dentry *de, + struct dentry *to, + enum fscache_why_object_killed why), + + TP_ARGS(obj, de, to, why), + + /* Note that obj may be NULL */ + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(struct dentry *, de ) + __field(struct dentry *, to ) + __field(enum fscache_why_object_killed, why ) + ), + + TP_fast_assign( + __entry->obj = obj ? obj->debug_id : UINT_MAX; + __entry->de = de; + __entry->to = to; + __entry->why = why; + ), + + TP_printk("o=%08x d=%p t=%p w=%s", + __entry->obj, __entry->de, __entry->to, + __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) + ); + +TRACE_EVENT(cachefiles_trunc, + TP_PROTO(struct cachefiles_object *obj, struct inode *backer, + loff_t from, loff_t to, enum cachefiles_trunc_trace why), + + TP_ARGS(obj, backer, from, to, why), + + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(unsigned int, backer ) + __field(enum cachefiles_trunc_trace, why ) + __field(loff_t, from ) + __field(loff_t, to ) + ), + + TP_fast_assign( + __entry->obj = obj->debug_id; + __entry->backer = backer->i_ino; + __entry->from = from; + __entry->to = to; + __entry->why = why; + ), + + TP_printk("o=%08x b=%08x %s l=%llx->%llx", + __entry->obj, + __entry->backer, + __print_symbolic(__entry->why, cachefiles_trunc_traces), + __entry->from, + __entry->to) + ); + TRACE_EVENT(cachefiles_mark_active, TP_PROTO(struct cachefiles_object *obj, struct inode *inode), -- cgit v1.2.3 From df98e87f2091774c377ddfaedfe64bd90ed4bdca Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 17 Nov 2021 16:22:21 +0000 Subject: cachefiles: Implement object lifecycle funcs Implement allocate, get, see and put functions for the cachefiles_object struct. The members of the struct we're going to need are also added. Additionally, implement a lifecycle tracepoint. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819639457.215744.4600093239395728232.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906939569.143852.3594314410666551982.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967148857.1823006.6332962598220464364.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021547762.640689.8422781599594931000.stgit@warthog.procyon.org.uk/ # v4 --- include/trace/events/cachefiles.h | 58 +++++++++++++++++++++++++++++++++++++++ include/trace/events/fscache.h | 4 +++ 2 files changed, 62 insertions(+) (limited to 'include') diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index 5975ea4977b2..54815cc776ba 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -18,6 +18,21 @@ #ifndef __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY #define __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY +enum cachefiles_obj_ref_trace { + cachefiles_obj_get_ioreq, + cachefiles_obj_new, + cachefiles_obj_put_alloc_fail, + cachefiles_obj_put_detach, + cachefiles_obj_put_ioreq, + cachefiles_obj_see_clean_commit, + cachefiles_obj_see_clean_delete, + cachefiles_obj_see_clean_drop_tmp, + cachefiles_obj_see_lookup_cookie, + cachefiles_obj_see_lookup_failed, + cachefiles_obj_see_withdraw_cookie, + cachefiles_obj_see_withdrawal, +}; + enum fscache_why_object_killed { FSCACHE_OBJECT_IS_STALE, FSCACHE_OBJECT_IS_WEIRD, @@ -66,6 +81,20 @@ enum cachefiles_error_trace { EM(FSCACHE_OBJECT_WAS_RETIRED, "was_retired") \ E_(FSCACHE_OBJECT_WAS_CULLED, "was_culled") +#define cachefiles_obj_ref_traces \ + EM(cachefiles_obj_get_ioreq, "GET ioreq") \ + EM(cachefiles_obj_new, "NEW obj") \ + EM(cachefiles_obj_put_alloc_fail, "PUT alloc_fail") \ + EM(cachefiles_obj_put_detach, "PUT detach") \ + EM(cachefiles_obj_put_ioreq, "PUT ioreq") \ + EM(cachefiles_obj_see_clean_commit, "SEE clean_commit") \ + EM(cachefiles_obj_see_clean_delete, "SEE clean_delete") \ + EM(cachefiles_obj_see_clean_drop_tmp, "SEE clean_drop_tmp") \ + EM(cachefiles_obj_see_lookup_cookie, "SEE lookup_cookie") \ + EM(cachefiles_obj_see_lookup_failed, "SEE lookup_failed") \ + EM(cachefiles_obj_see_withdraw_cookie, "SEE withdraw_cookie") \ + E_(cachefiles_obj_see_withdrawal, "SEE withdrawal") + #define cachefiles_trunc_traces \ EM(cachefiles_trunc_dio_adjust, "DIOADJ") \ EM(cachefiles_trunc_expand_tmpfile, "EXPTMP") \ @@ -100,6 +129,7 @@ enum cachefiles_error_trace { #define E_(a, b) TRACE_DEFINE_ENUM(a); cachefiles_obj_kill_traces; +cachefiles_obj_ref_traces; cachefiles_trunc_traces; cachefiles_error_traces; @@ -113,6 +143,34 @@ cachefiles_error_traces; #define E_(a, b) { a, b } +TRACE_EVENT(cachefiles_ref, + TP_PROTO(unsigned int object_debug_id, + unsigned int cookie_debug_id, + int usage, + enum cachefiles_obj_ref_trace why), + + TP_ARGS(object_debug_id, cookie_debug_id, usage, why), + + /* Note that obj may be NULL */ + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(unsigned int, cookie ) + __field(enum cachefiles_obj_ref_trace, why ) + __field(int, usage ) + ), + + TP_fast_assign( + __entry->obj = object_debug_id; + __entry->cookie = cookie_debug_id; + __entry->usage = usage; + __entry->why = why; + ), + + TP_printk("c=%08x o=%08x u=%d %s", + __entry->cookie, __entry->obj, __entry->usage, + __print_symbolic(__entry->why, cachefiles_obj_ref_traces)) + ); + TRACE_EVENT(cachefiles_lookup, TP_PROTO(struct cachefiles_object *obj, struct dentry *de), diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index 5fa37a8b4ec7..d9d830296ec3 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -49,6 +49,7 @@ enum fscache_volume_trace { enum fscache_cookie_trace { fscache_cookie_collision, fscache_cookie_discard, + fscache_cookie_get_attach_object, fscache_cookie_get_end_access, fscache_cookie_get_hash_collision, fscache_cookie_get_inval_work, @@ -57,6 +58,7 @@ enum fscache_cookie_trace { fscache_cookie_new_acquire, fscache_cookie_put_hash_collision, fscache_cookie_put_lru, + fscache_cookie_put_object, fscache_cookie_put_over_queued, fscache_cookie_put_relinquish, fscache_cookie_put_withdrawn, @@ -122,6 +124,7 @@ enum fscache_access_trace { #define fscache_cookie_traces \ EM(fscache_cookie_collision, "*COLLIDE*") \ EM(fscache_cookie_discard, "DISCARD ") \ + EM(fscache_cookie_get_attach_object, "GET attch") \ EM(fscache_cookie_get_hash_collision, "GET hcoll") \ EM(fscache_cookie_get_end_access, "GQ endac") \ EM(fscache_cookie_get_inval_work, "GQ inval") \ @@ -130,6 +133,7 @@ enum fscache_access_trace { EM(fscache_cookie_new_acquire, "NEW acq ") \ EM(fscache_cookie_put_hash_collision, "PUT hcoll") \ EM(fscache_cookie_put_lru, "PUT lru ") \ + EM(fscache_cookie_put_object, "PUT obj ") \ EM(fscache_cookie_put_over_queued, "PQ overq") \ EM(fscache_cookie_put_relinquish, "PUT relnq") \ EM(fscache_cookie_put_withdrawn, "PUT wthdn") \ -- cgit v1.2.3 From 72b957856b0c09eee542afcff29705dd0adda654 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 17 Nov 2021 16:11:07 +0000 Subject: cachefiles: Implement metadata/coherency data storage in xattrs Use an xattr on each backing file in the cache to store some metadata, such as the content type and the coherency data. Five content types are defined: (0) No content stored. (1) The file contains a single monolithic blob and must be all or nothing. This would be used for something like an AFS directory or a symlink. (2) The file is populated with content completely up to a point with nothing beyond that. (3) The file has a map attached and is sparsely populated. This would be stored in one or more additional xattrs. (4) The file is dirty, being in the process of local modification and the contents are not necessarily represented correctly by the metadata. The file should be deleted if this is seen on binding. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819641320.215744.16346770087799536862.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906942248.143852.5423738045012094252.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967151734.1823006.9301249989443622576.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021550471.640689.553853918307994335.stgit@warthog.procyon.org.uk/ # v4 --- include/trace/events/cachefiles.h | 56 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include') diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index 54815cc776ba..98b1eee4a7a8 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -42,6 +42,19 @@ enum fscache_why_object_killed { FSCACHE_OBJECT_WAS_CULLED, }; +enum cachefiles_coherency_trace { + cachefiles_coherency_check_aux, + cachefiles_coherency_check_content, + cachefiles_coherency_check_dirty, + cachefiles_coherency_check_len, + cachefiles_coherency_check_objsize, + cachefiles_coherency_check_ok, + cachefiles_coherency_check_type, + cachefiles_coherency_check_xattr, + cachefiles_coherency_set_fail, + cachefiles_coherency_set_ok, +}; + enum cachefiles_trunc_trace { cachefiles_trunc_dio_adjust, cachefiles_trunc_expand_tmpfile, @@ -95,6 +108,18 @@ enum cachefiles_error_trace { EM(cachefiles_obj_see_withdraw_cookie, "SEE withdraw_cookie") \ E_(cachefiles_obj_see_withdrawal, "SEE withdrawal") +#define cachefiles_coherency_traces \ + EM(cachefiles_coherency_check_aux, "BAD aux ") \ + EM(cachefiles_coherency_check_content, "BAD cont") \ + EM(cachefiles_coherency_check_dirty, "BAD dirt") \ + EM(cachefiles_coherency_check_len, "BAD len ") \ + EM(cachefiles_coherency_check_objsize, "BAD osiz") \ + EM(cachefiles_coherency_check_ok, "OK ") \ + EM(cachefiles_coherency_check_type, "BAD type") \ + EM(cachefiles_coherency_check_xattr, "BAD xatt") \ + EM(cachefiles_coherency_set_fail, "SET fail") \ + E_(cachefiles_coherency_set_ok, "SET ok ") + #define cachefiles_trunc_traces \ EM(cachefiles_trunc_dio_adjust, "DIOADJ") \ EM(cachefiles_trunc_expand_tmpfile, "EXPTMP") \ @@ -130,6 +155,7 @@ enum cachefiles_error_trace { cachefiles_obj_kill_traces; cachefiles_obj_ref_traces; +cachefiles_coherency_traces; cachefiles_trunc_traces; cachefiles_error_traces; @@ -287,6 +313,36 @@ TRACE_EVENT(cachefiles_rename, __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) ); +TRACE_EVENT(cachefiles_coherency, + TP_PROTO(struct cachefiles_object *obj, + ino_t ino, + enum cachefiles_content content, + enum cachefiles_coherency_trace why), + + TP_ARGS(obj, ino, content, why), + + /* Note that obj may be NULL */ + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(enum cachefiles_coherency_trace, why ) + __field(enum cachefiles_content, content ) + __field(u64, ino ) + ), + + TP_fast_assign( + __entry->obj = obj->debug_id; + __entry->why = why; + __entry->content = content; + __entry->ino = ino; + ), + + TP_printk("o=%08x %s i=%llx c=%u", + __entry->obj, + __print_symbolic(__entry->why, cachefiles_coherency_traces), + __entry->ino, + __entry->content) + ); + TRACE_EVENT(cachefiles_trunc, TP_PROTO(struct cachefiles_object *obj, struct inode *backer, loff_t from, loff_t to, enum cachefiles_trunc_trace why), -- cgit v1.2.3 From 287fd611238dd4b7e32fd3a8985aa387d26c4f29 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 21 Oct 2021 11:05:53 +0100 Subject: cachefiles: Implement begin and end I/O operation Implement the methods for beginning and ending an I/O operation. When called to begin an I/O operation, we are guaranteed that the cookie has reached a certain stage (we're called by fscache after it has done a suitable wait). If a file is available, we paste a ref over into the cache resources for the I/O routines to use. This means that the object can be invalidated whilst the I/O is ongoing without the need to synchronise as the file pointer in the object is replaced, but the file pointer in the cache resources is unaffected. Ending the operation just requires ditching any refs we have and dropping the access guarantee that fscache got for us on the cookie. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819645033.215744.2199344081658268312.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906951916.143852.9531384743995679857.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967161222.1823006.4461476204800357263.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021559030.640689.3684291785218094142.stgit@warthog.procyon.org.uk/ # v4 --- include/trace/events/fscache.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index d9d830296ec3..1594aefadeac 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -78,6 +78,7 @@ enum fscache_access_trace { fscache_access_cache_unpin, fscache_access_invalidate_cookie, fscache_access_invalidate_cookie_end, + fscache_access_io_end, fscache_access_io_not_live, fscache_access_io_read, fscache_access_io_resize, @@ -152,6 +153,7 @@ enum fscache_access_trace { EM(fscache_access_cache_unpin, "UNPIN cache ") \ EM(fscache_access_invalidate_cookie, "BEGIN inval ") \ EM(fscache_access_invalidate_cookie_end,"END inval ") \ + EM(fscache_access_io_end, "END io ") \ EM(fscache_access_io_not_live, "END io_notl") \ EM(fscache_access_io_read, "BEGIN io_read") \ EM(fscache_access_io_resize, "BEGIN io_resz") \ -- cgit v1.2.3 From 047487c947e8b96b94579c3a33207bd4e266b4c6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 21 Oct 2021 00:19:46 +0100 Subject: cachefiles: Implement the I/O routines Implement the I/O routines for cachefiles. There are two sets of routines here: preparation and actual I/O. Preparation for read involves looking to see whether there is data present, and how much. Netfslib tells us what it wants us to do and we have the option of adjusting shrinking and telling it whether to read from the cache, download from the server or simply clear a region. Preparation for write involves checking for space and defending against possibly running short of space, if necessary punching out a hole in the file so that we don't leave old data in the cache if we update the coherency information. Then there's a read routine and a write routine. They wait for the cookie state to move to something appropriate and then start a potentially asynchronous direct I/O operation upon it. Changes ======= ver #2: - Fix a misassigned variable[1]. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/YaZOCk9zxApPattb@archlinux-ax161/ [1] Link: https://lore.kernel.org/r/163819647945.215744.17827962047487125939.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906954666.143852.1504887120569779407.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967163110.1823006.9206718511874339672.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021562168.640689.8802250542405732391.stgit@warthog.procyon.org.uk/ # v4 --- include/trace/events/cachefiles.h | 121 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) (limited to 'include') diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index 98b1eee4a7a8..ab1376ebc3ab 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -61,6 +61,17 @@ enum cachefiles_trunc_trace { cachefiles_trunc_shrink, }; +enum cachefiles_prepare_read_trace { + cachefiles_trace_read_after_eof, + cachefiles_trace_read_found_hole, + cachefiles_trace_read_found_part, + cachefiles_trace_read_have_data, + cachefiles_trace_read_no_data, + cachefiles_trace_read_no_file, + cachefiles_trace_read_seek_error, + cachefiles_trace_read_seek_nxio, +}; + enum cachefiles_error_trace { cachefiles_trace_fallocate_error, cachefiles_trace_getxattr_error, @@ -125,6 +136,16 @@ enum cachefiles_error_trace { EM(cachefiles_trunc_expand_tmpfile, "EXPTMP") \ E_(cachefiles_trunc_shrink, "SHRINK") +#define cachefiles_prepare_read_traces \ + EM(cachefiles_trace_read_after_eof, "after-eof ") \ + EM(cachefiles_trace_read_found_hole, "found-hole") \ + EM(cachefiles_trace_read_found_part, "found-part") \ + EM(cachefiles_trace_read_have_data, "have-data ") \ + EM(cachefiles_trace_read_no_data, "no-data ") \ + EM(cachefiles_trace_read_no_file, "no-file ") \ + EM(cachefiles_trace_read_seek_error, "seek-error") \ + E_(cachefiles_trace_read_seek_nxio, "seek-enxio") + #define cachefiles_error_traces \ EM(cachefiles_trace_fallocate_error, "fallocate") \ EM(cachefiles_trace_getxattr_error, "getxattr") \ @@ -157,6 +178,7 @@ cachefiles_obj_kill_traces; cachefiles_obj_ref_traces; cachefiles_coherency_traces; cachefiles_trunc_traces; +cachefiles_prepare_read_traces; cachefiles_error_traces; /* @@ -343,6 +365,105 @@ TRACE_EVENT(cachefiles_coherency, __entry->content) ); +TRACE_EVENT(cachefiles_prep_read, + TP_PROTO(struct netfs_read_subrequest *sreq, + enum netfs_read_source source, + enum cachefiles_prepare_read_trace why, + ino_t cache_inode), + + TP_ARGS(sreq, source, why, cache_inode), + + TP_STRUCT__entry( + __field(unsigned int, rreq ) + __field(unsigned short, index ) + __field(unsigned short, flags ) + __field(enum netfs_read_source, source ) + __field(enum cachefiles_prepare_read_trace, why ) + __field(size_t, len ) + __field(loff_t, start ) + __field(unsigned int, netfs_inode ) + __field(unsigned int, cache_inode ) + ), + + TP_fast_assign( + __entry->rreq = sreq->rreq->debug_id; + __entry->index = sreq->debug_index; + __entry->flags = sreq->flags; + __entry->source = source; + __entry->why = why; + __entry->len = sreq->len; + __entry->start = sreq->start; + __entry->netfs_inode = sreq->rreq->inode->i_ino; + __entry->cache_inode = cache_inode; + ), + + TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x b=%x", + __entry->rreq, __entry->index, + __print_symbolic(__entry->source, netfs_sreq_sources), + __print_symbolic(__entry->why, cachefiles_prepare_read_traces), + __entry->flags, + __entry->start, __entry->len, + __entry->netfs_inode, __entry->cache_inode) + ); + +TRACE_EVENT(cachefiles_read, + TP_PROTO(struct cachefiles_object *obj, + struct inode *backer, + loff_t start, + size_t len), + + TP_ARGS(obj, backer, start, len), + + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(unsigned int, backer ) + __field(size_t, len ) + __field(loff_t, start ) + ), + + TP_fast_assign( + __entry->obj = obj->debug_id; + __entry->backer = backer->i_ino; + __entry->start = start; + __entry->len = len; + ), + + TP_printk("o=%08x b=%08x s=%llx l=%zx", + __entry->obj, + __entry->backer, + __entry->start, + __entry->len) + ); + +TRACE_EVENT(cachefiles_write, + TP_PROTO(struct cachefiles_object *obj, + struct inode *backer, + loff_t start, + size_t len), + + TP_ARGS(obj, backer, start, len), + + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(unsigned int, backer ) + __field(size_t, len ) + __field(loff_t, start ) + ), + + TP_fast_assign( + __entry->obj = obj->debug_id; + __entry->backer = backer->i_ino; + __entry->start = start; + __entry->len = len; + ), + + TP_printk("o=%08x b=%08x s=%llx l=%zx", + __entry->obj, + __entry->backer, + __entry->start, + __entry->len) + ); + TRACE_EVENT(cachefiles_trunc, TP_PROTO(struct cachefiles_object *obj, struct inode *backer, loff_t from, loff_t to, enum cachefiles_trunc_trace why), -- cgit v1.2.3 From 32e150037dce368d129996ffe5f98217b1974d9e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Dec 2021 09:51:43 +0000 Subject: fscache, cachefiles: Store the volume coherency data Store the volume coherency data in an xattr and check it when we rebind the volume. If it doesn't match the cache volume is moved to the graveyard and rebuilt anew. Changes ======= ver #4: - Remove a couple of debugging prints. Signed-off-by: David Howells Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/163967164397.1823006.2950539849831291830.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021563138.640689.15851092065380543119.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache.h | 2 ++ include/trace/events/cachefiles.h | 42 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 86b1c0db1de5..7bd35f60d19a 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -87,6 +87,8 @@ struct fscache_volume { #define FSCACHE_VOLUME_COLLIDED_WITH 2 /* Volume was collided with */ #define FSCACHE_VOLUME_ACQUIRE_PENDING 3 /* Volume is waiting to complete acquisition */ #define FSCACHE_VOLUME_CREATING 4 /* Volume is being created on disk */ + u8 coherency_len; /* Length of the coherency data */ + u8 coherency[]; /* Coherency data */ }; /* diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index ab1376ebc3ab..1172529b5b49 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -40,6 +40,7 @@ enum fscache_why_object_killed { FSCACHE_OBJECT_NO_SPACE, FSCACHE_OBJECT_WAS_RETIRED, FSCACHE_OBJECT_WAS_CULLED, + FSCACHE_VOLUME_IS_WEIRD, }; enum cachefiles_coherency_trace { @@ -53,6 +54,11 @@ enum cachefiles_coherency_trace { cachefiles_coherency_check_xattr, cachefiles_coherency_set_fail, cachefiles_coherency_set_ok, + cachefiles_coherency_vol_check_cmp, + cachefiles_coherency_vol_check_ok, + cachefiles_coherency_vol_check_xattr, + cachefiles_coherency_vol_set_fail, + cachefiles_coherency_vol_set_ok, }; enum cachefiles_trunc_trace { @@ -103,7 +109,8 @@ enum cachefiles_error_trace { EM(FSCACHE_OBJECT_INVALIDATED, "inval") \ EM(FSCACHE_OBJECT_NO_SPACE, "no_space") \ EM(FSCACHE_OBJECT_WAS_RETIRED, "was_retired") \ - E_(FSCACHE_OBJECT_WAS_CULLED, "was_culled") + EM(FSCACHE_OBJECT_WAS_CULLED, "was_culled") \ + E_(FSCACHE_VOLUME_IS_WEIRD, "volume_weird") #define cachefiles_obj_ref_traces \ EM(cachefiles_obj_get_ioreq, "GET ioreq") \ @@ -129,7 +136,12 @@ enum cachefiles_error_trace { EM(cachefiles_coherency_check_type, "BAD type") \ EM(cachefiles_coherency_check_xattr, "BAD xatt") \ EM(cachefiles_coherency_set_fail, "SET fail") \ - E_(cachefiles_coherency_set_ok, "SET ok ") + EM(cachefiles_coherency_set_ok, "SET ok ") \ + EM(cachefiles_coherency_vol_check_cmp, "VOL BAD cmp ") \ + EM(cachefiles_coherency_vol_check_ok, "VOL OK ") \ + EM(cachefiles_coherency_vol_check_xattr,"VOL BAD xatt") \ + EM(cachefiles_coherency_vol_set_fail, "VOL SET fail") \ + E_(cachefiles_coherency_vol_set_ok, "VOL SET ok ") #define cachefiles_trunc_traces \ EM(cachefiles_trunc_dio_adjust, "DIOADJ") \ @@ -365,6 +377,32 @@ TRACE_EVENT(cachefiles_coherency, __entry->content) ); +TRACE_EVENT(cachefiles_vol_coherency, + TP_PROTO(struct cachefiles_volume *volume, + ino_t ino, + enum cachefiles_coherency_trace why), + + TP_ARGS(volume, ino, why), + + /* Note that obj may be NULL */ + TP_STRUCT__entry( + __field(unsigned int, vol ) + __field(enum cachefiles_coherency_trace, why ) + __field(u64, ino ) + ), + + TP_fast_assign( + __entry->vol = volume->vcookie->debug_id; + __entry->why = why; + __entry->ino = ino; + ), + + TP_printk("V=%08x %s i=%llx", + __entry->vol, + __print_symbolic(__entry->why, cachefiles_coherency_traces), + __entry->ino) + ); + TRACE_EVENT(cachefiles_prep_read, TP_PROTO(struct netfs_read_subrequest *sreq, enum netfs_read_source source, -- cgit v1.2.3 From 3929eca769b5a231010b4978acc61c0735da198f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 21 Oct 2021 21:58:29 +0100 Subject: fscache, cachefiles: Display stats of no-space events Add stat counters of no-space events that caused caching not to happen and display in /proc/fs/fscache/stats. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819653216.215744.17210522251617386509.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906958369.143852.7257100711818401748.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967166917.1823006.14842444049198947892.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021566184.640689.4417328329632709265.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 3fa4902dc87c..007e47f38610 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -186,11 +186,17 @@ static inline void fscache_wait_for_objects(struct fscache_cache *cache) #ifdef CONFIG_FSCACHE_STATS extern atomic_t fscache_n_read; extern atomic_t fscache_n_write; +extern atomic_t fscache_n_no_write_space; +extern atomic_t fscache_n_no_create_space; #define fscache_count_read() atomic_inc(&fscache_n_read) #define fscache_count_write() atomic_inc(&fscache_n_write) +#define fscache_count_no_write_space() atomic_inc(&fscache_n_no_write_space) +#define fscache_count_no_create_space() atomic_inc(&fscache_n_no_create_space) #else #define fscache_count_read() do {} while(0) #define fscache_count_write() do {} while(0) +#define fscache_count_no_write_space() do {} while(0) +#define fscache_count_no_create_space() do {} while(0) #endif #endif /* _LINUX_FSCACHE_CACHE_H */ -- cgit v1.2.3 From 9f08ebc3438baaaefcc79654b330209b83397f17 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 22 Oct 2021 09:17:58 +0100 Subject: fscache, cachefiles: Display stat of culling events Add a stat counter of culling events whereby the cache backend culls a file to make space (when asked by cachefilesd in this case) and display in /proc/fs/fscache/stats. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819654165.215744.3797804661644212436.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906961387.143852.9291157239960289090.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967168266.1823006.14436200166581605746.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021567619.640689.4339228906248763197.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache-cache.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 007e47f38610..a174cedf4d90 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -188,15 +188,18 @@ extern atomic_t fscache_n_read; extern atomic_t fscache_n_write; extern atomic_t fscache_n_no_write_space; extern atomic_t fscache_n_no_create_space; +extern atomic_t fscache_n_culled; #define fscache_count_read() atomic_inc(&fscache_n_read) #define fscache_count_write() atomic_inc(&fscache_n_write) #define fscache_count_no_write_space() atomic_inc(&fscache_n_no_write_space) #define fscache_count_no_create_space() atomic_inc(&fscache_n_no_create_space) +#define fscache_count_culled() atomic_inc(&fscache_n_culled) #else #define fscache_count_read() do {} while(0) #define fscache_count_write() do {} while(0) #define fscache_count_no_write_space() do {} while(0) #define fscache_count_no_create_space() do {} while(0) +#define fscache_count_culled() do {} while(0) #endif #endif /* _LINUX_FSCACHE_CACHE_H */ -- cgit v1.2.3 From 2efd61a608b0039911924d2e5d7028eb37496e85 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 10 Dec 2021 16:36:20 +0000 Subject: KVM: Warn if mark_page_dirty() is called without an active vCPU The various kvm_write_guest() and mark_page_dirty() functions must only ever be called in the context of an active vCPU, because if dirty ring tracking is enabled it may simply oops when kvm_get_running_vcpu() returns NULL for the vcpu and then kvm_dirty_ring_get() dereferences it. This oops was reported by "butt3rflyh4ck" in https://lore.kernel.org/kvm/CAFcO6XOmoS7EacN_n6v4Txk7xL7iqRa2gABg3F7E3Naf5uG94g@mail.gmail.com/ That actual bug will be fixed under separate cover but this warning should help to prevent new ones from being added. Signed-off-by: David Woodhouse Message-Id: <20211210163625.2886-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/linux/kvm_dirty_ring.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h index 4da8d4a4140b..906f899813dc 100644 --- a/include/linux/kvm_dirty_ring.h +++ b/include/linux/kvm_dirty_ring.h @@ -43,11 +43,6 @@ static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, return 0; } -static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm) -{ - return NULL; -} - static inline int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring) { @@ -78,7 +73,6 @@ static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) u32 kvm_dirty_ring_get_rsvd_entries(void); int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); -struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm); /* * called with kvm->slots_lock held, returns the number of -- cgit v1.2.3 From 982ed0de4753ed6e71dbd40f82a5a066baf133ed Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 10 Dec 2021 16:36:21 +0000 Subject: KVM: Reinstate gfn_to_pfn_cache with invalidation support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can be used in two modes. There is an atomic mode where the cached mapping is accessed while holding the rwlock, and a mode where the physical address is used by a vCPU in guest mode. For the latter case, an invalidation will wake the vCPU with the new KVM_REQ_GPC_INVALIDATE, and the architecture will need to refresh any caches it still needs to access before entering guest mode again. Only one vCPU can be targeted by the wake requests; it's simple enough to make it wake all vCPUs or even a mask but I don't see a use case for that additional complexity right now. Invalidation happens from the invalidate_range_start MMU notifier, which needs to be able to sleep in order to wake the vCPU and wait for it. This means that revalidation potentially needs to "wait" for the MMU operation to complete and the invalidate_range_end notifier to be invoked. Like the vCPU when it takes a page fault in that period, we just spin — fixing that in a future patch by implementing an actual *wait* may be another part of shaving this particularly hirsute yak. As noted in the comments in the function itself, the only case where the invalidate_range_start notifier is expected to be called *without* being able to sleep is when the OOM reaper is killing the process. In that case, we expect the vCPU threads already to have exited, and thus there will be nothing to wake, and no reason to wait. So we clear the KVM_REQUEST_WAIT bit and send the request anyway, then complain loudly if there actually *was* anything to wake up. Signed-off-by: David Woodhouse Message-Id: <20211210163625.2886-3-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 103 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/kvm_types.h | 18 ++++++++ 2 files changed, 121 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f9bbcf519280..9bbb1f1d9e48 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -155,6 +155,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_UNBLOCK 2 #define KVM_REQ_UNHALT 3 #define KVM_REQ_VM_DEAD (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_GPC_INVALIDATE (5 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQUEST_ARCH_BASE 8 #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ @@ -593,6 +594,10 @@ struct kvm { unsigned long mn_active_invalidate_count; struct rcuwait mn_memslots_update_rcuwait; + /* For management / invalidation of gfn_to_pfn_caches */ + spinlock_t gpc_lock; + struct list_head gpc_list; + /* * created_vcpus is protected by kvm->lock, and is incremented * at the beginning of KVM_CREATE_VCPU. online_vcpus is only @@ -1099,6 +1104,104 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, unsigned long len); void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); +/** + * kvm_gfn_to_pfn_cache_init - prepare a cached kernel mapping and HPA for a + * given guest physical address. + * + * @kvm: pointer to kvm instance. + * @gpc: struct gfn_to_pfn_cache object. + * @vcpu: vCPU to be used for marking pages dirty and to be woken on + * invalidation. + * @guest_uses_pa: indicates that the resulting host physical PFN is used while + * @vcpu is IN_GUEST_MODE so invalidations should wake it. + * @kernel_map: requests a kernel virtual mapping (kmap / memremap). + * @gpa: guest physical address to map. + * @len: sanity check; the range being access must fit a single page. + * @dirty: mark the cache dirty immediately. + * + * @return: 0 for success. + * -EINVAL for a mapping which would cross a page boundary. + * -EFAULT for an untranslatable guest physical address. + * + * This primes a gfn_to_pfn_cache and links it into the @kvm's list for + * invalidations to be processed. Invalidation callbacks to @vcpu using + * %KVM_REQ_GPC_INVALIDATE will occur only for MMU notifiers, not for KVM + * memslot changes. Callers are required to use kvm_gfn_to_pfn_cache_check() + * to ensure that the cache is valid before accessing the target page. + */ +int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, + struct kvm_vcpu *vcpu, bool guest_uses_pa, + bool kernel_map, gpa_t gpa, unsigned long len, + bool dirty); + +/** + * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache. + * + * @kvm: pointer to kvm instance. + * @gpc: struct gfn_to_pfn_cache object. + * @gpa: current guest physical address to map. + * @len: sanity check; the range being access must fit a single page. + * @dirty: mark the cache dirty immediately. + * + * @return: %true if the cache is still valid and the address matches. + * %false if the cache is not valid. + * + * Callers outside IN_GUEST_MODE context should hold a read lock on @gpc->lock + * while calling this function, and then continue to hold the lock until the + * access is complete. + * + * Callers in IN_GUEST_MODE may do so without locking, although they should + * still hold a read lock on kvm->scru for the memslot checks. + */ +bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, + gpa_t gpa, unsigned long len); + +/** + * kvm_gfn_to_pfn_cache_refresh - update a previously initialized cache. + * + * @kvm: pointer to kvm instance. + * @gpc: struct gfn_to_pfn_cache object. + * @gpa: updated guest physical address to map. + * @len: sanity check; the range being access must fit a single page. + * @dirty: mark the cache dirty immediately. + * + * @return: 0 for success. + * -EINVAL for a mapping which would cross a page boundary. + * -EFAULT for an untranslatable guest physical address. + * + * This will attempt to refresh a gfn_to_pfn_cache. Note that a successful + * returm from this function does not mean the page can be immediately + * accessed because it may have raced with an invalidation. Callers must + * still lock and check the cache status, as this function does not return + * with the lock still held to permit access. + */ +int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, + gpa_t gpa, unsigned long len, bool dirty); + +/** + * kvm_gfn_to_pfn_cache_unmap - temporarily unmap a gfn_to_pfn_cache. + * + * @kvm: pointer to kvm instance. + * @gpc: struct gfn_to_pfn_cache object. + * + * This unmaps the referenced page and marks it dirty, if appropriate. The + * cache is left in the invalid state but at least the mapping from GPA to + * userspace HVA will remain cached and can be reused on a subsequent + * refresh. + */ +void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); + +/** + * kvm_gfn_to_pfn_cache_destroy - destroy and unlink a gfn_to_pfn_cache. + * + * @kvm: pointer to kvm instance. + * @gpc: struct gfn_to_pfn_cache object. + * + * This removes a cache from the @kvm's list to be processed on MMU notifier + * invocation. + */ +void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); + void kvm_sigset_activate(struct kvm_vcpu *vcpu); void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 888ef12862c9..dceac12c1ce5 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -19,6 +19,7 @@ struct kvm_memslots; enum kvm_mr_change; #include +#include #include @@ -53,6 +54,23 @@ struct gfn_to_hva_cache { struct kvm_memory_slot *memslot; }; +struct gfn_to_pfn_cache { + u64 generation; + gpa_t gpa; + unsigned long uhva; + struct kvm_memory_slot *memslot; + struct kvm_vcpu *vcpu; + struct list_head list; + rwlock_t lock; + void *khva; + kvm_pfn_t pfn; + bool active; + bool valid; + bool dirty; + bool kernel_map; + bool guest_uses_pa; +}; + #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE /* * Memory caches are used to preallocate memory ahead of various MMU flows, -- cgit v1.2.3 From 14243b387137a4afbe1df5d9dc15182d6657bb79 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 10 Dec 2021 16:36:23 +0000 Subject: KVM: x86/xen: Add KVM_IRQ_ROUTING_XEN_EVTCHN and event channel delivery This adds basic support for delivering 2 level event channels to a guest. Initially, it only supports delivery via the IRQ routing table, triggered by an eventfd. In order to do so, it has a kvm_xen_set_evtchn_fast() function which will use the pre-mapped shared_info page if it already exists and is still valid, while the slow path through the irqfd_inject workqueue will remap the shared_info page if necessary. It sets the bits in the shared_info page but not the vcpu_info; that is deferred to __kvm_xen_has_interrupt() which raises the vector to the appropriate vCPU. Add a 'verbose' mode to xen_shinfo_test while adding test cases for this. Signed-off-by: David Woodhouse Message-Id: <20211210163625.2886-5-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 7 +++++++ include/uapi/linux/kvm.h | 11 +++++++++++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9bbb1f1d9e48..3c47b146851a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -497,6 +497,12 @@ struct kvm_hv_sint { u32 sint; }; +struct kvm_xen_evtchn { + u32 port; + u32 vcpu; + u32 priority; +}; + struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; @@ -517,6 +523,7 @@ struct kvm_kernel_irq_routing_entry { } msi; struct kvm_s390_adapter_int adapter; struct kvm_hv_sint hv_sint; + struct kvm_xen_evtchn xen_evtchn; }; struct hlist_node link; }; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 469f05d69c8d..fbfd70d965c6 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1163,11 +1163,20 @@ struct kvm_irq_routing_hv_sint { __u32 sint; }; +struct kvm_irq_routing_xen_evtchn { + __u32 port; + __u32 vcpu; + __u32 priority; +}; + +#define KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ((__u32)(-1)) + /* gsi routing entry types */ #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 #define KVM_IRQ_ROUTING_HV_SINT 4 +#define KVM_IRQ_ROUTING_XEN_EVTCHN 5 struct kvm_irq_routing_entry { __u32 gsi; @@ -1179,6 +1188,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing_msi msi; struct kvm_irq_routing_s390_adapter adapter; struct kvm_irq_routing_hv_sint hv_sint; + struct kvm_irq_routing_xen_evtchn xen_evtchn; __u32 pad[8]; } u; }; @@ -1209,6 +1219,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) +#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) struct kvm_xen_hvm_config { __u32 flags; -- cgit v1.2.3 From d92321bbe46b0ecae0941461379d39599610d869 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 7 Jan 2022 16:06:35 +0000 Subject: ASoC: cs35l41: Update handling of test key registers In preparation for the addition of PM runtime support move the test key out of the register patches themselves. This is necessary to allow the test key to be held during cache synchronisation, which is required by the OTP settings which were unpacked from the device and written by the driver. Also whilst at it, the driver uses a mixture of accessing the test key register by name and by address, consistently use the name. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220107160636.6555-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l41.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h index 29a527457b48..56289b67b9a0 100644 --- a/include/sound/cs35l41.h +++ b/include/sound/cs35l41.h @@ -762,6 +762,8 @@ struct cs35l41_otp_map_element_t { extern struct regmap_config cs35l41_regmap_i2c; extern struct regmap_config cs35l41_regmap_spi; +int cs35l41_test_key_unlock(struct device *dev, struct regmap *regmap); +int cs35l41_test_key_lock(struct device *dev, struct regmap *regmap); int cs35l41_otp_unpack(struct device *dev, struct regmap *regmap); int cs35l41_register_errata_patch(struct device *dev, struct regmap *reg, unsigned int reg_revid); int cs35l41_set_channels(struct device *dev, struct regmap *reg, -- cgit v1.2.3 From f517ba4924ad026f2583553db02f3c8bc69de88b Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 7 Jan 2022 16:06:36 +0000 Subject: ASoC: cs35l41: Add support for hibernate memory retention mode The cs35l41 supports a low power DSP memory retention mode. Add support for entering this mode when then device is not in use. Co-authored-by: David Rhodes Signed-off-by: David Rhodes Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220107160636.6555-3-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l41.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h index 56289b67b9a0..bf7f9a9aeba0 100644 --- a/include/sound/cs35l41.h +++ b/include/sound/cs35l41.h @@ -40,6 +40,9 @@ #define CS35L41_PROTECT_REL_ERR_IGN 0x00002034 #define CS35L41_GPIO_PAD_CONTROL 0x0000242C #define CS35L41_JTAG_CONTROL 0x00002438 +#define CS35L41_PWRMGT_CTL 0x00002900 +#define CS35L41_WAKESRC_CTL 0x00002904 +#define CS35L41_PWRMGT_STS 0x00002908 #define CS35L41_PLL_CLK_CTRL 0x00002C04 #define CS35L41_DSP_CLK_CTRL 0x00002C08 #define CS35L41_GLOBAL_CLK_CTRL 0x00002C0C @@ -635,6 +638,8 @@ #define CS35L41_INPUT_DSP_TX1 0x32 #define CS35L41_INPUT_DSP_TX2 0x33 +#define CS35L41_WR_PEND_STS_MASK 0x2 + #define CS35L41_PLL_CLK_SEL_MASK 0x07 #define CS35L41_PLL_CLK_SEL_SHIFT 0 #define CS35L41_PLL_CLK_EN_MASK 0x10 -- cgit v1.2.3 From 44ea62813f0ab3d718de480504f4dfd0bdd01858 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Sep 2021 18:31:40 -0700 Subject: spi: don't include ptp_clock_kernel.h in spi.h Commit b42faeee718c ("spi: Add a PTP system timestamp to the transfer structure") added an include of ptp_clock_kernel.h to spi.h for struct ptp_system_timestamp but a forward declaration is enough. Let's use that to limit the number of objects we have to rebuild every time we touch networking headers. Signed-off-by: Jakub Kicinski Tested-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210904013140.2377609-1-kuba@kernel.org Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index eb7ac8a1e03c..7ab3fed7b804 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -14,12 +14,12 @@ #include #include #include -#include #include struct dma_chan; struct software_node; +struct ptp_system_timestamp; struct spi_controller; struct spi_transfer; struct spi_controller_mem_ops; -- cgit v1.2.3 From 18451db82ef7f943c60a7fce685f16172bda5106 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 6 Jan 2022 13:03:56 -0500 Subject: RDMA/core: Calculate UDP source port based on flow label or lqpn/rqpn Calculate and set UDP source port based on the flow label. If flow label is not defined in GRH then calculate it based on lqpn/rqpn. Link: https://lore.kernel.org/r/20220106180359.2915060-2-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6e9ad656ecb7..69d883f7fb41 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4749,6 +4749,23 @@ static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn) return (u32)(v & IB_GRH_FLOWLABEL_MASK); } +/** + * rdma_get_udp_sport - Calculate and set UDP source port based on the flow + * label. If flow label is not defined in GRH then + * calculate it based on lqpn/rqpn. + * + * @fl: flow label from GRH + * @lqpn: local qp number + * @rqpn: remote qp number + */ +static inline u16 rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn) +{ + if (!fl) + fl = rdma_calc_flow_label(lqpn, rqpn); + + return rdma_flow_label_to_udp_sport(fl); +} + const struct ib_port_immutable* ib_port_immutable_read(struct ib_device *dev, unsigned int port); #endif /* IB_VERBS_H */ -- cgit v1.2.3 From 5cad43a52ee3caf451cd645baa4beb53a1733dae Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 7 Jan 2022 16:42:29 +0200 Subject: net: dsa: felix: add port fast age support Add support for flushing the MAC table on a given port in the ocelot switch library, and use this functionality in the felix DSA driver. This operation is needed when a port leaves a bridge to become standalone, and when the learning is disabled, and when the STP state changes to a state where no FDB entry should be present. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20220107144229.244584-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- include/soc/mscc/ocelot.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 3e9454b00562..5c3a3597f1d2 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -833,6 +833,7 @@ void ocelot_port_bridge_join(struct ocelot *ocelot, int port, struct net_device *bridge); void ocelot_port_bridge_leave(struct ocelot *ocelot, int port, struct net_device *bridge); +int ocelot_mact_flush(struct ocelot *ocelot, int port); int ocelot_fdb_dump(struct ocelot *ocelot, int port, dsa_fdb_dump_cb_t *cb, void *data); int ocelot_fdb_add(struct ocelot *ocelot, int port, -- cgit v1.2.3 From 3506659e18a61ae525f3b9b4f5af23b4b149d4db Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 28 Nov 2021 14:53:35 -0500 Subject: mm: Add unmap_mapping_folio() Convert both callers of unmap_mapping_page() to call unmap_mapping_folio() instead. Also move zap_details from linux/mm.h to mm/memory.c Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski --- include/linux/mm.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 145f045b0ddc..c9cdb26802fb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1825,28 +1825,6 @@ static inline bool can_do_mlock(void) { return false; } extern int user_shm_lock(size_t, struct ucounts *); extern void user_shm_unlock(size_t, struct ucounts *); -/* - * Parameter block passed down to zap_pte_range in exceptional cases. - */ -struct zap_details { - struct address_space *zap_mapping; /* Check page->mapping if set */ - struct page *single_page; /* Locked page to be unmapped */ -}; - -/* - * We set details->zap_mappings when we want to unmap shared but keep private - * pages. Return true if skip zapping this page, false otherwise. - */ -static inline bool -zap_skip_check_mapping(struct zap_details *details, struct page *page) -{ - if (!details || !page) - return false; - - return details->zap_mapping && - (details->zap_mapping != page_rmapping(page)); -} - struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, @@ -1892,7 +1870,6 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, extern int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked); -void unmap_mapping_page(struct page *page); void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows); void unmap_mapping_range(struct address_space *mapping, @@ -1913,7 +1890,6 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address, BUG(); return -EFAULT; } -static inline void unmap_mapping_page(struct page *page) { } static inline void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows) { } static inline void unmap_mapping_range(struct address_space *mapping, -- cgit v1.2.3 From 1e84a3d997b74c33491899e31d48774f252213ab Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 2 Dec 2021 16:01:55 -0500 Subject: truncate,shmem: Add truncate_inode_folio() Convert all callers of truncate_inode_page() to call truncate_inode_folio() instead, and move the declaration to mm/internal.h. Move the assertion that the caller is not passing in a tail page to generic_error_remove_page(). We can't entirely remove the struct page from the callers yet because the page pointer in the pvec might be a shadow/dax/swap entry instead of actually a page. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c9cdb26802fb..d8b7d7ed14dd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1859,7 +1859,6 @@ extern void truncate_pagecache(struct inode *inode, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); -int truncate_inode_page(struct address_space *mapping, struct page *page); int generic_error_remove_page(struct address_space *mapping, struct page *page); int invalidate_inode_page(struct page *page); -- cgit v1.2.3 From 0e499ed3d7a216706e02eeded562627d3e69dcfd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 1 Sep 2020 23:17:50 -0400 Subject: filemap: Return only folios from find_get_entries() The callers have all been converted to work on folios, so convert find_get_entries() to return a batch of folios instead of pages. We also now return multiple large folios in a single call. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Jan Kara Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig --- include/linux/pagemap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index eb6e58e106c8..d2259a1da51c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -592,8 +592,6 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) return head + (index & (thp_nr_pages(head) - 1)); } -unsigned find_get_entries(struct address_space *mapping, pgoff_t start, - pgoff_t end, struct pagevec *pvec, pgoff_t *indices); unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, pgoff_t end, unsigned int nr_pages, struct page **pages); -- cgit v1.2.3 From 51dcbdac28d4dde915f78adf08bb3fac87f516e9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 7 Dec 2021 14:15:07 -0500 Subject: mm: Convert find_lock_entries() to use a folio_batch find_lock_entries() already only returned the head page of folios, so convert it to return a folio_batch instead of a pagevec. That cascades through converting truncate_inode_pages_range() to delete_from_page_cache_batch() and page_cache_delete_batch(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/pagemap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d2259a1da51c..6e038811f4c8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -16,7 +16,7 @@ #include /* for in_interrupt() */ #include -struct pagevec; +struct folio_batch; static inline bool mapping_empty(struct address_space *mapping) { @@ -936,7 +936,7 @@ static inline void __delete_from_page_cache(struct page *page, void *shadow) } void replace_page_cache_page(struct page *old, struct page *new); void delete_from_page_cache_batch(struct address_space *mapping, - struct pagevec *pvec); + struct folio_batch *fbatch); int try_to_release_page(struct page *page, gfp_t gfp); bool filemap_release_folio(struct folio *folio, gfp_t gfp); loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end, -- cgit v1.2.3 From 1613fac9aaf840af76faa747ea428a714af98dbd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 7 Dec 2021 14:28:49 -0500 Subject: mm: Remove pagevec_remove_exceptionals() All of its callers now call folio_batch_remove_exceptionals(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/pagevec.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index c3fa616d7ae7..dda8d5868c81 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -27,7 +27,6 @@ struct pagevec { void __pagevec_release(struct pagevec *pvec); void __pagevec_lru_add(struct pagevec *pvec); -void pagevec_remove_exceptionals(struct pagevec *pvec); unsigned pagevec_lookup_range(struct pagevec *pvec, struct address_space *mapping, pgoff_t *start, pgoff_t end); @@ -146,8 +145,5 @@ static inline void folio_batch_release(struct folio_batch *fbatch) pagevec_release((struct pagevec *)fbatch); } -static inline void folio_batch_remove_exceptionals(struct folio_batch *fbatch) -{ - pagevec_remove_exceptionals((struct pagevec *)fbatch); -} +void folio_batch_remove_exceptionals(struct folio_batch *fbatch); #endif /* _LINUX_PAGEVEC_H */ -- cgit v1.2.3 From 25a8de7f8d970ffa7263bd9d32a08138cd949f17 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 27 Aug 2021 07:21:49 -0400 Subject: XArray: Add xas_advance() Add a new helper function to help iterate over multi-index entries. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/xarray.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index a91e3d90df8a..d6d5da6ed735 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1580,6 +1580,24 @@ static inline void xas_set(struct xa_state *xas, unsigned long index) xas->xa_node = XAS_RESTART; } +/** + * xas_advance() - Skip over sibling entries. + * @xas: XArray operation state. + * @index: Index of last sibling entry. + * + * Move the operation state to refer to the last sibling entry. + * This is useful for loops that normally want to see sibling + * entries but sometimes want to skip them. Use xas_set() if you + * want to move to an index which is not part of this entry. + */ +static inline void xas_advance(struct xa_state *xas, unsigned long index) +{ + unsigned char shift = xas_is_node(xas) ? xas->xa_node->shift : 0; + + xas->xa_index = index; + xas->xa_offset = (index >> shift) & XA_CHUNK_MASK; +} + /** * xas_set_order() - Set up XArray operation state for a multislot entry. * @xas: XArray operation state. -- cgit v1.2.3 From 6b24ca4a1a8d4ee3221d6d44ddbb99f542e4bda3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 27 Jun 2020 22:19:08 -0400 Subject: mm: Use multi-index entries in the page cache We currently store large folios as 2^N consecutive entries. While this consumes rather more memory than necessary, it also turns out to be buggy. A writeback operation which starts within a tail page of a dirty folio will not write back the folio as the xarray's dirty bit is only set on the head index. With multi-index entries, the dirty bit will be found no matter where in the folio the operation starts. This does end up simplifying the page cache slightly, although not as much as I had hoped. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski --- include/linux/pagemap.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 6e038811f4c8..704cb1b4b15d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1125,16 +1125,6 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, VM_BUG_ON_PAGE(PageTail(page), page); array[i++] = page; rac->_batch_count += thp_nr_pages(page); - - /* - * The page cache isn't using multi-index entries yet, - * so the xas cursor needs to be manually moved to the - * next index. This can be removed once the page cache - * is converted. - */ - if (PageHead(page)) - xas_set(&xas, rac->_index + rac->_batch_count); - if (i == array_sz) break; } -- cgit v1.2.3 From 50a483405c420f5f35b8dbb71425459835ae44eb Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 6 Dec 2021 11:35:06 +0900 Subject: kbuild: move headers_check.pl to usr/include/ This script is only used by usr/include/Makefile. Make it local to the directory. Update the comment in include/uapi/linux/soundcard.h because 'make headers_check' is no longer functional. Signed-off-by: Masahiro Yamada --- include/uapi/linux/soundcard.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/soundcard.h b/include/uapi/linux/soundcard.h index f3b21f989872..ac1318793a86 100644 --- a/include/uapi/linux/soundcard.h +++ b/include/uapi/linux/soundcard.h @@ -1051,7 +1051,7 @@ typedef struct mixer_vol_table { * the GPL version of OSS-4.x and build against that version * of the header. * - * We redefine the extern keyword so that make headers_check + * We redefine the extern keyword so that usr/include/headers_check.pl * does not complain about SEQ_USE_EXTBUF. */ #define SEQ_DECLAREBUF() SEQ_USE_EXTBUF() -- cgit v1.2.3 From e32cf5dfbe227b355776948b2c9b5691b84d1cbd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 22 Dec 2021 22:10:09 -0600 Subject: kthread: Generalize pf_io_worker so it can point to struct kthread The point of using set_child_tid to hold the kthread pointer was that it already did what is necessary. There are now restrictions on when set_child_tid can be initialized and when set_child_tid can be used in schedule_tail. Which indicates that continuing to use set_child_tid to hold the kthread pointer is a bad idea. Instead of continuing to use the set_child_tid field of task_struct generalize the pf_io_worker field of task_struct and use it to hold the kthread pointer. Rename pf_io_worker (which is a void * pointer) to worker_private so it can be used to store kthreads struct kthread pointer. Update the kthread code to store the kthread pointer in the worker_private field. Remove the places where set_child_tid had to be dealt with carefully because kthreads also used it. Link: https://lkml.kernel.org/r/CAHk-=wgtFAA9SbVYg0gR1tqPMC17-NYcs0GQkaYg1bGhh1uJQQ@mail.gmail.com Link: https://lkml.kernel.org/r/87a6grvqy8.fsf_-_@email.froward.int.ebiederm.org Suggested-by: Linus Torvalds Signed-off-by: "Eric W. Biederman" --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 78c351e35fec..52f2fdffa3ab 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -987,8 +987,8 @@ struct task_struct { /* CLONE_CHILD_CLEARTID: */ int __user *clear_child_tid; - /* PF_IO_WORKER */ - void *pf_io_worker; + /* PF_KTHREAD | PF_IO_WORKER */ + void *worker_private; u64 utime; u64 stime; -- cgit v1.2.3 From 3367d1bd738c01b2737eaab7d922bfe5f1a41f38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 8 Jan 2022 16:31:58 +0100 Subject: power: supply: Provide stubs for charge_behaviour helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_SYSFS is not enabled provide stubs for the helper functions to not break their callers. Fixes: 539b9c94ac83 ("power: supply: add helpers for charge_behaviour sysfs") Reported-by: kernel test robot Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20220108153158.189489-1-linux@weissschuh.net Signed-off-by: Hans de Goede --- include/linux/power_supply.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 71f0379c2af8..f6b9ed4630fa 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -553,6 +553,21 @@ ssize_t power_supply_charge_behaviour_show(struct device *dev, char *buf); int power_supply_charge_behaviour_parse(unsigned int available_behaviours, const char *buf); +#else +static inline +ssize_t power_supply_charge_behaviour_show(struct device *dev, + unsigned int available_behaviours, + enum power_supply_charge_behaviour behaviour, + char *buf) +{ + return -EOPNOTSUPP; +} + +static inline int power_supply_charge_behaviour_parse(unsigned int available_behaviours, + const char *buf) +{ + return -EOPNOTSUPP; +} #endif #endif /* __LINUX_POWER_SUPPLY_H__ */ -- cgit v1.2.3 From 2f824d4d197e02275562359a2ae5274177ce500c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 8 Jan 2022 09:48:31 -0600 Subject: signal: Remove SIGNAL_GROUP_COREDUMP After the previous cleanups "signal->core_state" is set whenever SIGNAL_GROUP_COREDUMP is set and "signal->core_state" is tested whenver the code wants to know if a coredump is in progress. The remaining tests of SIGNAL_GROUP_COREDUMP also test to see if SIGNAL_GROUP_EXIT is set. Similarly the only place that sets SIGNAL_GROUP_COREDUMP also sets SIGNAL_GROUP_EXIT. Which makes SIGNAL_GROUP_COREDUMP unecessary and redundant. So stop setting SIGNAL_GROUP_COREDUMP, stop testing SIGNAL_GROUP_COREDUMP, and remove it's definition. With the setting of SIGNAL_GROUP_COREDUMP gone, coredump_finish no longer needs to clear SIGNAL_GROUP_COREDUMP out of signal->flags by setting SIGNAL_GROUP_EXIT. Link: https://lkml.kernel.org/r/20211213225350.27481-5-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index fa26d2a58413..ecc10e148799 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -256,7 +256,6 @@ struct signal_struct { #define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ #define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ #define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ -#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */ /* * Pending notifications to parent. */ @@ -272,7 +271,7 @@ struct signal_struct { static inline void signal_set_stop_flags(struct signal_struct *sig, unsigned int flags) { - WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); + WARN_ON(sig->flags & SIGNAL_GROUP_EXIT); sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; } -- cgit v1.2.3 From 60700e38fb68e800607ca7a027060d5419fc5798 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 6 Jun 2021 13:47:53 -0500 Subject: signal: Rename group_exit_task group_exec_task The only remaining user of group_exit_task is exec. Rename the field so that it is clear which part of the code uses it. Update the comment above the definition of group_exec_task to document how it is currently used. Link: https://lkml.kernel.org/r/20211213225350.27481-7-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index ecc10e148799..d3248aba5183 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -109,13 +109,9 @@ struct signal_struct { /* thread group exit support */ int group_exit_code; - /* overloaded: - * - notify group_exit_task when ->count is equal to notify_count - * - everyone except group_exit_task is stopped during signal delivery - * of fatal signals, group_exit_task processes the signal. - */ + /* notify group_exec_task when notify_count is less or equal to 0 */ int notify_count; - struct task_struct *group_exit_task; + struct task_struct *group_exec_task; /* thread group stop support, overloads group_exit_code too */ int group_stop_count; @@ -275,11 +271,11 @@ static inline void signal_set_stop_flags(struct signal_struct *sig, sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; } -/* If true, all threads except ->group_exit_task have pending SIGKILL */ +/* If true, all threads except ->group_exec_task have pending SIGKILL */ static inline int signal_group_exit(const struct signal_struct *sig) { return (sig->flags & SIGNAL_GROUP_EXIT) || - (sig->group_exit_task != NULL); + (sig->group_exec_task != NULL); } extern void flush_signals(struct task_struct *); -- cgit v1.2.3 From 49697335e0b441b0553598c1b48ee9ebb053d2f1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 24 Jun 2021 02:14:30 -0500 Subject: signal: Remove the helper signal_group_exit This helper is misleading. It tests for an ongoing exec as well as the process having received a fatal signal. Sometimes it is appropriate to treat an on-going exec differently than a process that is shutting down due to a fatal signal. In particular taking the fast path out of exit_signals instead of retargeting signals is not appropriate during exec, and not changing the the exit code in do_group_exit during exec. Removing the helper makes it more obvious what is going on as both cases must be coded for explicitly. While removing the helper fix the two cases where I have observed using signal_group_exit resulted in the wrong result. In exit_signals only test for SIGNAL_GROUP_EXIT so that signals are retargetted during an exec. In do_group_exit use 0 as the exit code during an exec as de_thread does not set group_exit_code. As best as I can determine group_exit_code has been is set to 0 most of the time during de_thread. During a thread group stop group_exit_code is set to the stop signal and when the thread group receives SIGCONT group_exit_code is reset to 0. Link: https://lkml.kernel.org/r/20211213225350.27481-8-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index d3248aba5183..b6ecb9fc4cd2 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -271,13 +271,6 @@ static inline void signal_set_stop_flags(struct signal_struct *sig, sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; } -/* If true, all threads except ->group_exec_task have pending SIGKILL */ -static inline int signal_group_exit(const struct signal_struct *sig) -{ - return (sig->flags & SIGNAL_GROUP_EXIT) || - (sig->group_exec_task != NULL); -} - extern void flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); -- cgit v1.2.3 From 2d4bcf886e42f0f4846a3d9bdc3a90d278903a2e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 8 Jan 2022 11:23:02 -0600 Subject: exit: Remove profile_task_exit & profile_munmap When I say remove I mean remove. All profile_task_exit and profile_munmap do is call a blocking notifier chain. The helpers profile_task_register and profile_task_unregister are not called anywhere in the tree. Which means this is all dead code. So remove the dead code and make it easier to read do_exit. Reviewed-by: Christoph Hellwig Link: https://lkml.kernel.org/r/20220103213312.9144-1-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/profile.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include') diff --git a/include/linux/profile.h b/include/linux/profile.h index fd18ca96f557..f7eb2b57d890 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -31,11 +31,6 @@ static inline int create_proc_profile(void) } #endif -enum profile_type { - PROFILE_TASK_EXIT, - PROFILE_MUNMAP -}; - #ifdef CONFIG_PROFILING extern int prof_on __read_mostly; @@ -66,23 +61,14 @@ static inline void profile_hit(int type, void *ip) struct task_struct; struct mm_struct; -/* task is in do_exit() */ -void profile_task_exit(struct task_struct * task); - /* task is dead, free task struct ? Returns 1 if * the task was taken, 0 if the task should be freed. */ int profile_handoff_task(struct task_struct * task); -/* sys_munmap */ -void profile_munmap(unsigned long addr); - int task_handoff_register(struct notifier_block * n); int task_handoff_unregister(struct notifier_block * n); -int profile_event_register(enum profile_type, struct notifier_block * n); -int profile_event_unregister(enum profile_type, struct notifier_block * n); - #else #define prof_on 0 @@ -117,19 +103,7 @@ static inline int task_handoff_unregister(struct notifier_block * n) return -ENOSYS; } -static inline int profile_event_register(enum profile_type t, struct notifier_block * n) -{ - return -ENOSYS; -} - -static inline int profile_event_unregister(enum profile_type t, struct notifier_block * n) -{ - return -ENOSYS; -} - -#define profile_task_exit(a) do { } while (0) #define profile_handoff_task(a) (0) -#define profile_munmap(a) do { } while (0) #endif /* CONFIG_PROFILING */ -- cgit v1.2.3 From 2873cd31a20c25b5e763b35e5fb886f0938c6dd5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 8 Jan 2022 10:03:24 -0600 Subject: exit: Remove profile_handoff_task All profile_handoff_task does is notify the task_free_notifier chain. The helpers task_handoff_register and task_handoff_unregister are used to add and delete entries from that chain and are never called. So remove the dead code and make it much easier to read and reason about __put_task_struct. Suggested-by: Al Viro Link: https://lkml.kernel.org/r/87fspyw6m0.fsf@email.froward.int.ebiederm.org Signed-off-by: "Eric W. Biederman" --- include/linux/profile.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include') diff --git a/include/linux/profile.h b/include/linux/profile.h index f7eb2b57d890..11db1ec516e2 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -61,14 +61,6 @@ static inline void profile_hit(int type, void *ip) struct task_struct; struct mm_struct; -/* task is dead, free task struct ? Returns 1 if - * the task was taken, 0 if the task should be freed. - */ -int profile_handoff_task(struct task_struct * task); - -int task_handoff_register(struct notifier_block * n); -int task_handoff_unregister(struct notifier_block * n); - #else #define prof_on 0 @@ -93,17 +85,6 @@ static inline void profile_hit(int type, void *ip) return; } -static inline int task_handoff_register(struct notifier_block * n) -{ - return -ENOSYS; -} - -static inline int task_handoff_unregister(struct notifier_block * n) -{ - return -ENOSYS; -} - -#define profile_handoff_task(a) (0) #endif /* CONFIG_PROFILING */ -- cgit v1.2.3 From 4264178416cd52a55a3eccbefb3973866e060280 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 20 Dec 2021 16:28:53 -0600 Subject: ptrace: Remove unused regs argument from ptrace_report_syscall Link: https://lkml.kernel.org/r/20220103213312.9144-7-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/tracehook.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 2564b7434b4d..88c007ab5ebc 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -54,8 +54,7 @@ struct linux_binprm; /* * ptrace report for syscall entry and exit looks identical. */ -static inline int ptrace_report_syscall(struct pt_regs *regs, - unsigned long message) +static inline int ptrace_report_syscall(unsigned long message) { int ptrace = current->ptrace; @@ -102,7 +101,7 @@ static inline int ptrace_report_syscall(struct pt_regs *regs, static inline __must_check int tracehook_report_syscall_entry( struct pt_regs *regs) { - return ptrace_report_syscall(regs, PTRACE_EVENTMSG_SYSCALL_ENTRY); + return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY); } /** @@ -127,7 +126,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step) if (step) user_single_step_report(regs); else - ptrace_report_syscall(regs, PTRACE_EVENTMSG_SYSCALL_EXIT); + ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT); } /** -- cgit v1.2.3 From 40595cdc93edf4110c0f0c0b06f8d82008f23929 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 16 Dec 2021 12:20:13 -0500 Subject: nfs: block notification on fs with its own ->lock NFSv4.1 supports an optional lock notification feature which notifies the client when a lock comes available. (Normally NFSv4 clients just poll for locks if necessary.) To make that work, we need to request a blocking lock from the filesystem. We turned that off for NFS in commit f657f8eef3ff ("nfs: don't atempt blocking locks on nfs reexports") [sic] because it actually blocks the nfsd thread while waiting for the lock. Thanks to Vasily Averin for pointing out that NFS isn't the only filesystem with that problem. Any filesystem that leaves ->lock NULL will use posix_lock_file(), which does the right thing. Simplest is just to assume that any filesystem that defines its own ->lock is not safe to request a blocking lock from. So, this patch mostly reverts commit f657f8eef3ff ("nfs: don't atempt blocking locks on nfs reexports") [sic] and commit b840be2f00c0 ("lockd: don't attempt blocking locks on nfs reexports"), and instead uses a check of ->lock (Vasily's suggestion) to decide whether to support blocking lock notifications on a given filesystem. Also add a little documentation. Perhaps someday we could add back an export flag later to allow filesystems with "good" ->lock methods to support blocking lock notifications. Reported-by: Vasily Averin Signed-off-by: J. Bruce Fields [ cel: Description rewritten to address checkpatch nits ] [ cel: Fixed warning when SUNRPC debugging is disabled ] [ cel: Fixed NULL check ] Signed-off-by: Chuck Lever Reviewed-by: Vasily Averin --- include/linux/exportfs.h | 2 -- include/linux/lockd/lockd.h | 9 +++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3260fe714846..fe848901fcc3 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -221,8 +221,6 @@ struct export_operations { #define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply atomic attribute updates */ -#define EXPORT_OP_SYNC_LOCKS (0x20) /* Filesystem can't do - asychronous blocking locks */ unsigned long flags; }; diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index c4ae6506b8b3..fcef192e5e45 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -303,10 +303,15 @@ void nlmsvc_invalidate_all(void); int nlmsvc_unlock_all_by_sb(struct super_block *sb); int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr); +static inline struct file *nlmsvc_file_file(struct nlm_file *file) +{ + return file->f_file[O_RDONLY] ? + file->f_file[O_RDONLY] : file->f_file[O_WRONLY]; +} + static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) { - return locks_inode(file->f_file[O_RDONLY] ? - file->f_file[O_RDONLY] : file->f_file[O_WRONLY]); + return locks_inode(nlmsvc_file_file(file)); } static inline int __nlm_privileged_request4(const struct sockaddr *sap) -- cgit v1.2.3 From 7d30198ee24f2ddcc4fefcd38a9b76bd8ab31360 Mon Sep 17 00:00:00 2001 From: Andrew Zaborowski Date: Tue, 9 Nov 2021 16:16:49 +0100 Subject: keys: X.509 public key issuer lookup without AKID There are non-root X.509 v3 certificates in use out there that contain no Authority Key Identifier extension (RFC5280 section 4.2.1.1). For trust verification purposes the kernel asymmetric key type keeps two struct asymmetric_key_id instances that the key can be looked up by, and another two to look up the key's issuer. The x509 public key type and the PKCS7 type generate them from the SKID and AKID extensions in the certificate. In effect current code has no way to look up the issuer certificate for verification without the AKID. To remedy this, add a third asymmetric_key_id blob to the arrays in both asymmetric_key_id's (for certficate subject) and in the public_keys_signature's auth_ids (for issuer lookup), using just raw subject and issuer DNs from the certificate. Adapt asymmetric_key_ids() and its callers to use the third ID for lookups when none of the other two are available. Attempt to keep the logic intact when they are, to minimise behaviour changes. Adapt the restrict functions' NULL-checks to include that ID too. Do not modify the lookup logic in pkcs7_verify.c, the AKID extensions are still required there. Internally use a new "dn:" prefix to the search specifier string generated for the key lookup in find_asymmetric_key(). This tells asymmetric_key_match_preparse to only match the data against the raw DN in the third ID and shouldn't conflict with search specifiers already in use. In effect implement what (2) in the struct asymmetric_key_id comment (include/keys/asymmetric-type.h) is probably talking about already, so do not modify that comment. It is also how "openssl verify" looks up issuer certificates without the AKID available. Lookups by the raw DN are unambiguous only provided that the CAs respect the condition in RFC5280 4.2.1.1 that the AKID may only be omitted if the CA uses a single signing key. The following is an example of two things that this change enables. A self-signed ceritficate is generated following the example from https://letsencrypt.org/docs/certificates-for-localhost/, and can be looked up by an identifier and verified against itself by linking to a restricted keyring -- both things not possible before due to the missing AKID extension: $ openssl req -x509 -out localhost.crt -outform DER -keyout localhost.key \ -newkey rsa:2048 -nodes -sha256 \ -subj '/CN=localhost' -extensions EXT -config <( \ echo -e "[dn]\nCN=localhost\n[req]\ndistinguished_name = dn\n[EXT]\n" \ "subjectAltName=DNS:localhost\nkeyUsage=digitalSignature\n" \ "extendedKeyUsage=serverAuth") $ keyring=`keyctl newring test @u` $ trusted=`keyctl padd asymmetric trusted $keyring < localhost.crt`; \ echo $trusted 39726322 $ keyctl search $keyring asymmetric dn:3112301006035504030c096c6f63616c686f7374 39726322 $ keyctl restrict_keyring $keyring asymmetric key_or_keyring:$trusted $ keyctl padd asymmetric verified $keyring < localhost.crt Signed-off-by: Andrew Zaborowski Reviewed-by: Jarkko Sakkinen Acked-by: Jarkko Sakkinen Acked-by: David Howells Signed-off-by: Jarkko Sakkinen --- include/crypto/public_key.h | 2 +- include/keys/asymmetric-type.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h index f603325c0c30..68f7aa2a7e55 100644 --- a/include/crypto/public_key.h +++ b/include/crypto/public_key.h @@ -36,7 +36,7 @@ extern void public_key_free(struct public_key *key); * Public key cryptography signature data */ struct public_key_signature { - struct asymmetric_key_id *auth_ids[2]; + struct asymmetric_key_id *auth_ids[3]; u8 *s; /* Signature */ u8 *digest; u32 s_size; /* Number of bytes in signature */ diff --git a/include/keys/asymmetric-type.h b/include/keys/asymmetric-type.h index c432fdb8547f..6c5d4963e15b 100644 --- a/include/keys/asymmetric-type.h +++ b/include/keys/asymmetric-type.h @@ -53,7 +53,7 @@ struct asymmetric_key_id { }; struct asymmetric_key_ids { - void *id[2]; + void *id[3]; }; extern bool asymmetric_key_id_same(const struct asymmetric_key_id *kid1, @@ -81,6 +81,7 @@ const struct public_key *asymmetric_key_public_key(const struct key *key) extern struct key *find_asymmetric_key(struct key *keyring, const struct asymmetric_key_id *id_0, const struct asymmetric_key_id *id_1, + const struct asymmetric_key_id *id_2, bool partial); /* -- cgit v1.2.3 From 0aa698787aa2a9e8840987e54ba2982559de6404 Mon Sep 17 00:00:00 2001 From: axelj Date: Mon, 13 Dec 2021 08:09:25 +0100 Subject: tpm: Add Upgrade/Reduced mode support for TPM2 modules If something went wrong during the TPM firmware upgrade, like power failure or the firmware image file get corrupted, the TPM might end up in Upgrade or Failure mode upon the next start. The state is persistent between the TPM power cycle/restart. According to TPM specification: * If the TPM is in Upgrade mode, it will answer with TPM2_RC_UPGRADE to all commands except TPM2_FieldUpgradeData(). It may also accept other commands if it is able to complete them using the previously installed firmware. * If the TPM is in Failure mode, it will allow performing TPM initialization but will not provide any crypto operations. Will happily respond to Field Upgrade calls. Change the behavior of the tpm2_auto_startup(), so it detects the active running mode of the TPM by adding the following checks. If tpm2_do_selftest() call returns TPM2_RC_UPGRADE, the TPM is in Upgrade mode. If the TPM is in Failure mode, it will successfully respond to both tpm2_do_selftest() and tpm2_startup() calls. Although, will fail to answer to tpm2_get_cc_attrs_tbl(). Use this fact to conclude that TPM is in Failure mode. If detected that the TPM is in the Upgrade or Failure mode, the function sets TPM_CHIP_FLAG_FIRMWARE_UPGRADE_MODE flag. The TPM_CHIP_FLAG_FIRMWARE_UPGRADE_MODE flag is used later during driver initialization/deinitialization to disable functionality which makes no sense or will fail in the current TPM state. Following functionality is affected: * Do not register TPM as a hwrng * Do not register sysfs entries which provide information impossible to obtain in limited mode * Do not register resource managed character device Signed-off-by: axelj Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 12d827734686..dfeb25a0362d 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -207,6 +207,7 @@ enum tpm2_return_codes { TPM2_RC_INITIALIZE = 0x0100, /* RC_VER1 */ TPM2_RC_FAILURE = 0x0101, TPM2_RC_DISABLED = 0x0120, + TPM2_RC_UPGRADE = 0x012D, TPM2_RC_COMMAND_CODE = 0x0143, TPM2_RC_TESTING = 0x090A, /* RC_WARN */ TPM2_RC_REFERENCE_H0 = 0x0910, @@ -278,6 +279,7 @@ enum tpm_chip_flags { TPM_CHIP_FLAG_HAVE_TIMEOUTS = BIT(4), TPM_CHIP_FLAG_ALWAYS_POWERED = BIT(5), TPM_CHIP_FLAG_FIRMWARE_POWER_MANAGED = BIT(6), + TPM_CHIP_FLAG_FIRMWARE_UPGRADE = BIT(7), }; #define to_tpm_chip(d) container_of(d, struct tpm_chip, dev) @@ -399,6 +401,14 @@ static inline void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value) tpm_buf_append(buf, (u8 *) &value2, 4); } +/* + * Check if TPM device is in the firmware upgrade mode. + */ +static inline bool tpm_is_firmware_upgrade(struct tpm_chip *chip) +{ + return chip->flags & TPM_CHIP_FLAG_FIRMWARE_UPGRADE; +} + static inline u32 tpm2_rc_value(u32 rc) { return (rc & BIT(7)) ? rc & 0xff : rc; -- cgit v1.2.3 From 292c33c95defd0b814fec1fc8cd60d16556cf7b8 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 7 Jan 2022 08:52:28 +0800 Subject: block: fix old-style declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the 'inline' keyword to the front of 'void'. Remove a warning found by clang(make W=1 LLVM=1) ./include/linux/blk-mq.h:259:1: warning: ‘inline’ is not at beginning of declaration Reported-by: Abaci Robot Signed-off-by: Yang Li Link: https://lore.kernel.org/r/20220107005228.103927-1-yang.lee@linux.alibaba.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f40a05ecca4a..d319ffa59354 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -256,7 +256,7 @@ static inline unsigned short req_get_ioprio(struct request *req) * @rq: The request to move * @prev: The request preceding @rq in @src (NULL if @rq is the head) */ -static void inline rq_list_move(struct request **src, struct request **dst, +static inline void rq_list_move(struct request **src, struct request **dst, struct request *rq, struct request *prev) { if (prev) -- cgit v1.2.3 From 0ea9fc15b1d7d6636d429e74ffe3f86bf2f2f7d6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Nov 2021 17:05:07 +0100 Subject: fs/locks: fix fcntl_getlk64/fcntl_setlk64 stub prototypes My patch to rework oabi fcntl64() introduced a harmless sparse warning when file locking is disabled: arch/arm/kernel/sys_oabi-compat.c:251:51: sparse: sparse: incorrect type in argument 3 (different address spaces) @@ expected struct flock64 [noderef] __user *user @@ got struct flock64 * @@ arch/arm/kernel/sys_oabi-compat.c:251:51: sparse: expected struct flock64 [noderef] __user *user arch/arm/kernel/sys_oabi-compat.c:251:51: sparse: got struct flock64 * arch/arm/kernel/sys_oabi-compat.c:265:55: sparse: sparse: incorrect type in argument 4 (different address spaces) @@ expected struct flock64 [noderef] __user *user @@ got struct flock64 * @@ arch/arm/kernel/sys_oabi-compat.c:265:55: sparse: expected struct flock64 [noderef] __user *user arch/arm/kernel/sys_oabi-compat.c:265:55: sparse: got struct flock64 * When file locking is enabled, everything works correctly and the right data gets passed, but the stub declarations in linux/fs.h did not get modified when the calling conventions changed in an earlier patch. Reported-by: kernel test robot Fixes: 7e2d8c29ecdd ("ARM: 9111/1: oabi-compat: rework fcntl64() emulation") Fixes: a75d30c77207 ("fs/locks: pass kernel struct flock to fcntl_getlk/setlk") Cc: Christoph Hellwig Reviewed-by: Christoph Hellwig Acked-by: Christian Brauner Signed-off-by: Arnd Bergmann Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index bbf812ce89a8..5122d13775c2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1220,13 +1220,13 @@ static inline int fcntl_setlk(unsigned int fd, struct file *file, #if BITS_PER_LONG == 32 static inline int fcntl_getlk64(struct file *file, unsigned int cmd, - struct flock64 __user *user) + struct flock64 *user) { return -EINVAL; } static inline int fcntl_setlk64(unsigned int fd, struct file *file, - unsigned int cmd, struct flock64 __user *user) + unsigned int cmd, struct flock64 *user) { return -EACCES; } -- cgit v1.2.3 From 613a0c67d12f33dcbeec2836f5fe60d05b4c18c0 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Sun, 26 Dec 2021 01:12:41 +0800 Subject: netfilter: conntrack: Use max() instead of doing it manually Fix following coccicheck warning: ./include/net/netfilter/nf_conntrack.h:282:16-17: WARNING opportunity for max(). Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 871489df63c6..a4a14f3a5e38 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -279,7 +279,7 @@ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; - return timeout > 0 ? timeout : 0; + return max(timeout, 0); } static inline bool nf_ct_is_expired(const struct nf_conn *ct) -- cgit v1.2.3 From 719774377622bc4025d2a74f551b5dc2158c6c30 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 7 Jan 2022 05:03:22 +0100 Subject: netfilter: conntrack: convert to refcount_t api Convert nf_conn reference counting from atomic_t to refcount_t based api. refcount_t api provides more runtime sanity checks and will warn on certain constructs, e.g. refcount_inc() on a zero reference count, which usually indicates use-after-free. For this reason template allocation is changed to init the refcount to 1, the subsequenct add operations are removed. Likewise, init_conntrack() is changed to set the initial refcount to 1 instead refcount_inc(). This is safe because the new entry is not (yet) visible to other cpus. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 700ea077ce2d..a03f7a80b9ab 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -2,7 +2,7 @@ #ifndef _NF_CONNTRACK_COMMON_H #define _NF_CONNTRACK_COMMON_H -#include +#include #include struct ip_conntrack_stat { @@ -25,19 +25,19 @@ struct ip_conntrack_stat { #define NFCT_PTRMASK ~(NFCT_INFOMASK) struct nf_conntrack { - atomic_t use; + refcount_t use; }; void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) { - if (nfct && atomic_dec_and_test(&nfct->use)) + if (nfct && refcount_dec_and_test(&nfct->use)) nf_conntrack_destroy(nfct); } static inline void nf_conntrack_get(struct nf_conntrack *nfct) { if (nfct) - atomic_inc(&nfct->use); + refcount_inc(&nfct->use); } #endif /* _NF_CONNTRACK_COMMON_H */ -- cgit v1.2.3 From 3fce16493dc1aa2c9af3d7e7bd360dfe203a3e6a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 7 Jan 2022 05:03:23 +0100 Subject: netfilter: core: move ip_ct_attach indirection to struct nf_ct_hook ip_ct_attach predates struct nf_ct_hook, we can place it there and remove the exported symbol. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 3fda1a508733..e0e3f3355ab1 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -440,7 +440,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include -extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu; void nf_ct_attach(struct sk_buff *, const struct sk_buff *); struct nf_conntrack_tuple; bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, @@ -463,6 +462,7 @@ struct nf_ct_hook { void (*destroy)(struct nf_conntrack *); bool (*get_tuple_skb)(struct nf_conntrack_tuple *, const struct sk_buff *); + void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); }; extern struct nf_ct_hook __rcu *nf_ct_hook; -- cgit v1.2.3 From 285c8a7a58158cb1805c97ff03875df2ba2ea1fe Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 7 Jan 2022 05:03:24 +0100 Subject: netfilter: make function op structures const No functional changes, these structures should be const. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index e0e3f3355ab1..15e71bfff726 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -381,13 +381,13 @@ struct nf_nat_hook { enum ip_conntrack_dir dir); }; -extern struct nf_nat_hook __rcu *nf_nat_hook; +extern const struct nf_nat_hook __rcu *nf_nat_hook; static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { #if IS_ENABLED(CONFIG_NF_NAT) - struct nf_nat_hook *nat_hook; + const struct nf_nat_hook *nat_hook; rcu_read_lock(); nat_hook = rcu_dereference(nf_nat_hook); @@ -464,7 +464,7 @@ struct nf_ct_hook { const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); }; -extern struct nf_ct_hook __rcu *nf_ct_hook; +extern const struct nf_ct_hook __rcu *nf_ct_hook; struct nlattr; @@ -479,7 +479,7 @@ struct nfnl_ct_hook { void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, s32 off); }; -extern struct nfnl_ct_hook __rcu *nfnl_ct_hook; +extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook; /** * nf_skb_duplicated - TEE target has sent a packet -- cgit v1.2.3 From 6ae7989c9af0d98ab64196f4f4c6f6499454bd23 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 7 Jan 2022 05:03:25 +0100 Subject: netfilter: conntrack: avoid useless indirection during conntrack destruction nf_ct_put() results in a usesless indirection: nf_ct_put -> nf_conntrack_put -> nf_conntrack_destroy -> rcu readlock + indirect call of ct_hooks->destroy(). There are two _put helpers: nf_ct_put and nf_conntrack_put. The latter is what should be used in code that MUST NOT cause a linker dependency on the conntrack module (e.g. calls from core network stack). Everyone else should call nf_ct_put() instead. A followup patch will convert a few nf_conntrack_put() calls to nf_ct_put(), in particular from modules that already have a conntrack dependency such as act_ct or even nf_conntrack itself. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 2 ++ include/net/netfilter/nf_conntrack.h | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index a03f7a80b9ab..2770db2fa080 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -29,6 +29,8 @@ struct nf_conntrack { }; void nf_conntrack_destroy(struct nf_conntrack *nfct); + +/* like nf_ct_put, but without module dependency on nf_conntrack */ static inline void nf_conntrack_put(struct nf_conntrack *nfct) { if (nfct && refcount_dec_and_test(&nfct->use)) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index a4a14f3a5e38..8731d5bcb47d 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -76,6 +76,8 @@ struct nf_conn { * Hint, SKB address this struct and refcnt via skb->_nfct and * helpers nf_conntrack_get() and nf_conntrack_put(). * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, + * except that the latter uses internal indirection and does not + * result in a conntrack module dependency. * beware nf_ct_get() is different and don't inc refcnt. */ struct nf_conntrack ct_general; @@ -170,11 +172,13 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) return (struct nf_conn *)(nfct & NFCT_PTRMASK); } +void nf_ct_destroy(struct nf_conntrack *nfct); + /* decrement reference count on a conntrack */ static inline void nf_ct_put(struct nf_conn *ct) { - WARN_ON(!ct); - nf_conntrack_put(&ct->ct_general); + if (ct && refcount_dec_and_test(&ct->ct_general.use)) + nf_ct_destroy(&ct->ct_general); } /* Protocol module loading */ -- cgit v1.2.3 From 6316136ec6e3dd1c302f7e7289a9ee46ecc610ae Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 7 Jan 2022 15:46:16 +0100 Subject: netfilter: egress: avoid a lockdep splat include/linux/netfilter_netdev.h:97 suspicious rcu_dereference_check() usage! 2 locks held by sd-resolve/1100: 0: ..(rcu_read_lock_bh){1:3}, at: ip_finish_output2 1: ..(rcu_read_lock_bh){1:3}, at: __dev_queue_xmit __dev_queue_xmit+0 .. The helper has two callers, one uses rcu_read_lock, the other rcu_read_lock_bh(). Annotate the dereference to reflect this. Fixes: 42df6e1d221dd ("netfilter: Introduce egress hook") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_netdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index b71b57a83bb4..b4dd96e4dc8d 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -94,7 +94,7 @@ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, return skb; #endif - e = rcu_dereference(dev->nf_hooks_egress); + e = rcu_dereference_check(dev->nf_hooks_egress, rcu_read_lock_bh_held()); if (!e) return skb; -- cgit v1.2.3 From 2c865a8a28a10e9800a3dd07ca339d24563e3d65 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 9 Jan 2022 17:11:19 +0100 Subject: netfilter: nf_tables: add rule blob layout This patch adds a blob layout per chain to represent the ruleset in the packet datapath. size (unsigned long) struct nft_rule_dp struct nft_expr ... struct nft_rule_dp struct nft_expr ... struct nft_rule_dp (is_last=1) The new structure nft_rule_dp represents the rule in a more compact way (smaller memory footprint) compared to the control-plane nft_rule structure. The ruleset blob is a read-only data structure. The first field contains the blob size, then the rules containing expressions. There is a trailing rule which is used by the tracing infrastructure which is equivalent to the NULL rule marker in the previous representation. The blob size field does not include the size of this trailing rule marker. The ruleset blob is generated from the commit path. This patch reuses the infrastructure available since 0cbc06b3faba ("netfilter: nf_tables: remove synchronize_rcu in commit phase") to build the array of rules per chain. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a0d9e0b47ab8..5a046b01bdab 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -974,6 +974,20 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, #define NFT_CHAIN_POLICY_UNSET U8_MAX +struct nft_rule_dp { + u64 is_last:1, + dlen:12, + handle:42; /* for tracing */ + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_expr)))); +}; + +struct nft_rule_blob { + unsigned long size; + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_rule_dp)))); +}; + /** * struct nft_chain - nf_tables chain * @@ -987,8 +1001,8 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, * @name: name of the chain */ struct nft_chain { - struct nft_rule *__rcu *rules_gen_0; - struct nft_rule *__rcu *rules_gen_1; + struct nft_rule_blob __rcu *blob_gen_0; + struct nft_rule_blob __rcu *blob_gen_1; struct list_head rules; struct list_head list; struct rhlist_head rhlhead; @@ -1003,7 +1017,7 @@ struct nft_chain { u8 *udata; /* Only used during control plane commit phase: */ - struct nft_rule **rules_next; + struct nft_rule_blob *blob_next; }; int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); @@ -1321,7 +1335,7 @@ struct nft_traceinfo { const struct nft_pktinfo *pkt; const struct nft_base_chain *basechain; const struct nft_chain *chain; - const struct nft_rule *rule; + const struct nft_rule_dp *rule; const struct nft_verdict *verdict; enum nft_trace_types type; bool packet_dumped; -- cgit v1.2.3 From 642c8eff5c6099dfde386ca3906fa55dc98f9ade Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 9 Jan 2022 17:11:20 +0100 Subject: netfilter: nf_tables: add NFT_REG32_NUM Add a definition including the maximum number of 32-bits registers that are used a scratchpad memory area to store data. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5a046b01bdab..515e5db97e01 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -105,6 +105,8 @@ struct nft_data { }; } __attribute__((aligned(__alignof__(u64)))); +#define NFT_REG32_NUM 20 + /** * struct nft_regs - nf_tables register set * @@ -115,7 +117,7 @@ struct nft_data { */ struct nft_regs { union { - u32 data[20]; + u32 data[NFT_REG32_NUM]; struct nft_verdict verdict; }; }; -- cgit v1.2.3 From 12e4ecfa244be2f117ef5304d2d866b65e70bff3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 9 Jan 2022 17:11:21 +0100 Subject: netfilter: nf_tables: add register tracking infrastructure This patch adds new infrastructure to skip redundant selector store operations on the same register to achieve a performance boost from the packet path. This is particularly noticeable in pure linear rulesets but it also helps in rulesets which are already heaving relying in maps to avoid ruleset linear inspection. The idea is to keep data of the most recurrent store operations on register to reuse them with cmp and lookup expressions. This infrastructure allows for dynamic ruleset updates since the ruleset blob reduction happens from the kernel. Userspace still needs to be updated to maximize register utilization to cooperate to improve register data reuse / reduce number of store on register operations. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 515e5db97e01..1c37ce61daea 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -122,6 +122,16 @@ struct nft_regs { }; }; +struct nft_regs_track { + struct { + const struct nft_expr *selector; + const struct nft_expr *bitwise; + } regs[NFT_REG32_NUM]; + + const struct nft_expr *cur; + const struct nft_expr *last; +}; + /* Store/load an u8, u16 or u64 integer to/from the u32 data register. * * Note, when using concatenations, register allocation happens at 32-bit @@ -886,6 +896,8 @@ struct nft_expr_ops { int (*validate)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data); + bool (*reduce)(struct nft_regs_track *track, + const struct nft_expr *expr); bool (*gc)(struct net *net, const struct nft_expr *expr); int (*offload)(struct nft_offload_ctx *ctx, -- cgit v1.2.3 From be5650f8f47e8cffbbbcad08b71103685e971f20 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 9 Jan 2022 17:11:24 +0100 Subject: netfilter: nft_bitwise: track register operations Check if the destination register already contains the data that this bitwise expression performs. This allows to skip this redundant operation. If the destination contains a different bitwise operation, cancel the register tracking information. If the destination contains no bitwise operation, update the register tracking information. Update the payload and meta expression to check if this bitwise operation has been already performed on the register. Hence, both the payload/meta and the bitwise expressions are reduced. There is also a special case: If source register != destination register and source register is not updated by a previous bitwise operation, then transfer selector from the source register to the destination register. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 1c37ce61daea..eaf55da9a205 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -358,6 +358,8 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src); void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr); +bool nft_expr_reduce_bitwise(struct nft_regs_track *track, + const struct nft_expr *expr); struct nft_set_ext; -- cgit v1.2.3 From 6f022c2ddbcefaee79502ce5386dfe351d457070 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 6 Jan 2022 17:38:04 +0200 Subject: net: openvswitch: Fix ct_state nat flags for conns arriving from tc Netfilter conntrack maintains NAT flags per connection indicating whether NAT was configured for the connection. Openvswitch maintains NAT flags on the per packet flow key ct_state field, indicating whether NAT was actually executed on the packet. When a packet misses from tc to ovs the conntrack NAT flags are set. However, NAT was not necessarily executed on the packet because the connection's state might still be in NEW state. As such, openvswitch wrongly assumes that NAT was executed and sets an incorrect flow key NAT flags. Fix this, by flagging to openvswitch which NAT was actually done in act_ct via tc_skb_ext and tc_skb_cb to the openvswitch module, so the packet flow key NAT flags will be correctly set. Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct") Signed-off-by: Paul Blakey Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20220106153804.26451-1-paulb@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 +++- include/net/pkt_sched.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4507d77d6941..60ab0c2fe567 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -287,7 +287,9 @@ struct tc_skb_ext { __u32 chain; __u16 mru; __u16 zone; - bool post_ct; + u8 post_ct:1; + u8 post_ct_snat:1; + u8 post_ct_dnat:1; }; #endif diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 9e71691c491b..9e7b21c0b3a6 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -197,7 +197,9 @@ struct tc_skb_cb { struct qdisc_skb_cb qdisc_cb; u16 mru; - bool post_ct; + u8 post_ct:1; + u8 post_ct_snat:1; + u8 post_ct_dnat:1; u16 zone; /* Only valid if post_ct = true */ }; -- cgit v1.2.3 From c504e5c2f9648a1e5c2be01e8c3f59d394192bd3 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 9 Jan 2022 14:36:26 +0800 Subject: net: skb: introduce kfree_skb_reason() Introduce the interface kfree_skb_reason(), which is able to pass the reason why the skb is dropped to 'kfree_skb' tracepoint. Add the 'reason' field to 'trace_kfree_skb', therefor user can get more detail information about abnormal skb with 'drop_monitor' or eBPF. All drop reasons are defined in the enum 'skb_drop_reason', and they will be print as string in 'kfree_skb' tracepoint in format of 'reason: XXX'. ( Maybe the reasons should be defined in a uapi header file, so that user space can use them? ) Signed-off-by: Menglong Dong Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 23 ++++++++++++++++++++++- include/trace/events/skb.h | 36 +++++++++++++++++++++++++++++------- 2 files changed, 51 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 642acb0d1646..ef0870abc791 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -305,6 +305,17 @@ struct sk_buff_head { struct sk_buff; +/* The reason of skb drop, which is used in kfree_skb_reason(). + * en...maybe they should be splited by group? + * + * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is + * used to translate the reason to string. + */ +enum skb_drop_reason { + SKB_DROP_REASON_NOT_SPECIFIED, + SKB_DROP_REASON_MAX, +}; + /* To allow 64K frame to be packed as single skb without frag_list we * require 64K/PAGE_SIZE pages plus 1 additional page to allow for * buffers which do not start on a page boundary. @@ -1085,8 +1096,18 @@ static inline bool skb_unref(struct sk_buff *skb) return true; } +void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); + +/** + * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason + * @skb: buffer to free + */ +static inline void kfree_skb(struct sk_buff *skb) +{ + kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); +} + void skb_release_head_state(struct sk_buff *skb); -void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); void skb_tx_error(struct sk_buff *skb); diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 9e92f22eb086..294c61bbe44b 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -9,29 +9,51 @@ #include #include +#define TRACE_SKB_DROP_REASON \ + EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \ + EMe(SKB_DROP_REASON_MAX, MAX) + +#undef EM +#undef EMe + +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +TRACE_SKB_DROP_REASON + +#undef EM +#undef EMe +#define EM(a, b) { a, #b }, +#define EMe(a, b) { a, #b } + /* * Tracepoint for free an sk_buff: */ TRACE_EVENT(kfree_skb, - TP_PROTO(struct sk_buff *skb, void *location), + TP_PROTO(struct sk_buff *skb, void *location, + enum skb_drop_reason reason), - TP_ARGS(skb, location), + TP_ARGS(skb, location, reason), TP_STRUCT__entry( - __field( void *, skbaddr ) - __field( void *, location ) - __field( unsigned short, protocol ) + __field(void *, skbaddr) + __field(void *, location) + __field(unsigned short, protocol) + __field(enum skb_drop_reason, reason) ), TP_fast_assign( __entry->skbaddr = skb; __entry->location = location; __entry->protocol = ntohs(skb->protocol); + __entry->reason = reason; ), - TP_printk("skbaddr=%p protocol=%u location=%p", - __entry->skbaddr, __entry->protocol, __entry->location) + TP_printk("skbaddr=%p protocol=%u location=%p reason: %s", + __entry->skbaddr, __entry->protocol, __entry->location, + __print_symbolic(__entry->reason, + TRACE_SKB_DROP_REASON)) ); TRACE_EVENT(consume_skb, -- cgit v1.2.3 From 85125597419aec3aa7b8f3b8713e415f997796f2 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 9 Jan 2022 14:36:27 +0800 Subject: net: skb: use kfree_skb_reason() in tcp_v4_rcv() Replace kfree_skb() with kfree_skb_reason() in tcp_v4_rcv(). Following drop reasons are added: SKB_DROP_REASON_NO_SOCKET SKB_DROP_REASON_PKT_TOO_SMALL SKB_DROP_REASON_TCP_CSUM SKB_DROP_REASON_TCP_FILTER After this patch, 'kfree_skb' event will print message like this: $ TASK-PID CPU# ||||| TIMESTAMP FUNCTION $ | | | ||||| | | -0 [000] ..s1. 36.113438: kfree_skb: skbaddr=(____ptrval____) protocol=2048 location=(____ptrval____) reason: NO_SOCKET The reason of skb drop is printed too. Signed-off-by: Menglong Dong Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 ++++ include/trace/events/skb.h | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ef0870abc791..c9c97b0d0fe9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -313,6 +313,10 @@ struct sk_buff; */ enum skb_drop_reason { SKB_DROP_REASON_NOT_SPECIFIED, + SKB_DROP_REASON_NO_SOCKET, + SKB_DROP_REASON_PKT_TOO_SMALL, + SKB_DROP_REASON_TCP_CSUM, + SKB_DROP_REASON_TCP_FILTER, SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 294c61bbe44b..faa7d068a7bc 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -11,6 +11,10 @@ #define TRACE_SKB_DROP_REASON \ EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \ + EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \ + EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ + EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ + EM(SKB_DROP_REASON_TCP_FILTER, TCP_FILTER) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From 1c7fab70df085d866a3765955f397ca2b4025b15 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 9 Jan 2022 14:36:28 +0800 Subject: net: skb: use kfree_skb_reason() in __udp4_lib_rcv() Replace kfree_skb() with kfree_skb_reason() in __udp4_lib_rcv. New drop reason 'SKB_DROP_REASON_UDP_CSUM' is added for udp csum error. Signed-off-by: Menglong Dong Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 + include/trace/events/skb.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c9c97b0d0fe9..af64c7de9b53 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -317,6 +317,7 @@ enum skb_drop_reason { SKB_DROP_REASON_PKT_TOO_SMALL, SKB_DROP_REASON_TCP_CSUM, SKB_DROP_REASON_TCP_FILTER, + SKB_DROP_REASON_UDP_CSUM, SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index faa7d068a7bc..3e042ca2cedb 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -15,6 +15,7 @@ EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ EM(SKB_DROP_REASON_TCP_FILTER, TCP_FILTER) \ + EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM -- cgit v1.2.3 From 1c582c6dc4244d88f702dc3afd5b47225332edf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 3 Nov 2021 20:38:21 +0100 Subject: 9p/trans_fd: split into dedicated module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows these transports only to be used when needed. Link: https://lkml.kernel.org/r/20211103193823.111007-3-linux@weissschuh.net Signed-off-by: Thomas Weißschuh [Dominique: Kconfig NET_9P_FD: -depends VIRTIO, +default NET_9P] Signed-off-by: Dominique Martinet --- include/net/9p/9p.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 9c6ec78e47a5..24a509f559ee 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -551,6 +551,4 @@ struct p9_fcall { int p9_errstr2errno(char *errstr, int len); int p9_error_init(void); -int p9_trans_fd_init(void); -void p9_trans_fd_exit(void); #endif /* NET_9P_H */ -- cgit v1.2.3 From 019641d1b57dff018972b23c95e898f9ff18222f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 3 Nov 2021 20:38:23 +0100 Subject: net/p9: load default transports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that all transports are split into modules it may happen that no transports are registered when v9fs_get_default_trans() is called. When that is the case try to load more transports from modules. Link: https://lkml.kernel.org/r/20211103193823.111007-5-linux@weissschuh.net Signed-off-by: Thomas Weißschuh [Dominique: constify v9fs_get_trans_by_name argument as per patch1v2] Signed-off-by: Dominique Martinet --- include/net/9p/transport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index 15a4e6a9dbf7..ff842f963071 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -54,7 +54,7 @@ struct p9_trans_module { void v9fs_register_trans(struct p9_trans_module *m); void v9fs_unregister_trans(struct p9_trans_module *m); -struct p9_trans_module *v9fs_get_trans_by_name(char *s); +struct p9_trans_module *v9fs_get_trans_by_name(const char *s); struct p9_trans_module *v9fs_get_default_trans(void); void v9fs_put_trans(struct p9_trans_module *m); -- cgit v1.2.3 From 1ed147e29e505de819aaa5b57919c25348f72e1f Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 25 Nov 2021 21:01:11 +0900 Subject: exfat: move super block magic number to magic.h Move exfat superblock magic number from local definition to magic.h. It is also needed by userspace programs that call fstatfs(). Acked-by: Christian Brauner Signed-off-by: Namjae Jeon --- include/uapi/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 35687dcb1a42..8ab81ea13424 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -43,6 +43,7 @@ #define MINIX3_SUPER_MAGIC 0x4d5a /* minix v3 fs, 60 char names */ #define MSDOS_SUPER_MAGIC 0x4d44 /* MD */ +#define EXFAT_SUPER_MAGIC 0x2011BAB0 #define NCP_SUPER_MAGIC 0x564c /* Guess, what 0x564c is :-) */ #define NFS_SUPER_MAGIC 0x6969 #define OCFS2_SUPER_MAGIC 0x7461636f -- cgit v1.2.3 From a6b5a28eb56c3f4988f7ff5290b954ba296e309a Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Sat, 14 Nov 2020 13:43:54 -0500 Subject: nfs: Convert to new fscache volume/cookie API Change the nfs filesystem to support fscache's indexing rewrite and reenable caching in nfs. The following changes have been made: (1) The fscache_netfs struct is no more, and there's no need to register the filesystem as a whole. (2) The session cookie is now an fscache_volume cookie, allocated with fscache_acquire_volume(). That takes three parameters: a string representing the "volume" in the index, a string naming the cache to use (or NULL) and a u64 that conveys coherency metadata for the volume. For nfs, I've made it render the volume name string as: "nfs,,,
,,,*<,param>[,]" (3) The fscache_cookie_def is no more and needed information is passed directly to fscache_acquire_cookie(). The cache no longer calls back into the filesystem, but rather metadata changes are indicated at other times. fscache_acquire_cookie() is passed the same keying and coherency information as before. (4) fscache_enable/disable_cookie() have been removed. Call fscache_use_cookie() and fscache_unuse_cookie() when a file is opened or closed to prevent a cache file from being culled and to keep resources to hand that are needed to do I/O. If a file is opened for writing, we invalidate it with FSCACHE_INVAL_DIO_WRITE in lieu of doing writeback to the cache, thereby making it cease caching until all currently open files are closed. This should give the same behaviour as the uptream code. Making the cache store local modifications isn't straightforward for NFS, so that's left for future patches. (5) fscache_invalidate() now needs to be given uptodate auxiliary data and a file size. It also takes a flag to indicate if this was due to a DIO write. (6) Call nfs_fscache_invalidate() with FSCACHE_INVAL_DIO_WRITE on a file to which a DIO write is made. (7) Call fscache_note_page_release() from nfs_release_page(). (8) Use a killable wait in nfs_vm_page_mkwrite() when waiting for PG_fscache to be cleared. (9) The functions to read and write data to/from the cache are stubbed out pending a conversion to use netfslib. Changes ======= ver #3: - Added missing =n fallback for nfs_fscache_release_file()[1][2]. ver #2: - Use gfpflags_allow_blocking() rather than using flag directly. - fscache_acquire_volume() now returns errors. - Remove NFS_INO_FSCACHE as it's no longer used. - Need to unuse a cookie on file-release, not inode-clear. Signed-off-by: Dave Wysochanski Co-developed-by: David Howells Signed-off-by: David Howells Tested-by: Dave Wysochanski Acked-by: Jeff Layton cc: Trond Myklebust cc: Anna Schumaker cc: linux-nfs@vger.kernel.org cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/202112100804.nksO8K4u-lkp@intel.com/ [1] Link: https://lore.kernel.org/r/202112100957.2oEDT20W-lkp@intel.com/ [2] Link: https://lore.kernel.org/r/163819668938.215744.14448852181937731615.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906979003.143852.2601189243864854724.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967182112.1823006.7791504655391213379.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021575950.640689.12069642327533368467.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/nfs_fs.h | 1 - include/linux/nfs_fs_sb.h | 9 ++------- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 05f249f20f55..00835bacd236 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -275,7 +275,6 @@ struct nfs4_copy_state { #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ #define NFS_INO_INVALIDATING (3) /* inode is being invalidated */ #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ -#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ #define NFS_INO_FORCE_READDIR (7) /* force readdirplus */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 2a9acbfe00f0..77b2dba27bbb 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -120,11 +120,6 @@ struct nfs_client { * This is used to generate the mv0 callback address. */ char cl_ipaddr[48]; - -#ifdef CONFIG_NFS_FSCACHE - struct fscache_cookie *fscache; /* client index cache cookie */ -#endif - struct net *cl_net; struct list_head pending_cb_stateids; }; @@ -194,8 +189,8 @@ struct nfs_server { struct nfs_auth_info auth_info; /* parsed auth flavors */ #ifdef CONFIG_NFS_FSCACHE - struct nfs_fscache_key *fscache_key; /* unique key for superblock */ - struct fscache_cookie *fscache; /* superblock cookie */ + struct fscache_volume *fscache; /* superblock cookie */ + char *fscache_uniq; /* Uniquifier (or NULL) */ #endif u32 pnfs_blksize; /* layout_blksize attr */ -- cgit v1.2.3 From 16f2f4e679cfdaa9552574484f104014908a76c6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 27 Aug 2021 15:19:34 +0100 Subject: nfs: Implement cache I/O by accessing the cache directly Move NFS to using fscache DIO API instead of the old upstream I/O API as that has been removed. This is a stopgap solution as the intention is that at sometime in the future, the cache will move to using larger blocks and won't be able to store individual pages in order to deal with the potential for data corruption due to the backing filesystem being able insert/remove bridging blocks of zeros into its extent list[1]. NFS then reads and writes cache pages synchronously and one page at a time. The preferred change would be to use the netfs lib, but the new I/O API can be used directly. It's just that as the cache now needs to track data for itself, caching blocks may exceed page size... This code is somewhat borrowed from my "fallback I/O" patchset[2]. Changes ======= ver #3: - Restore lost =n fallback for nfs_fscache_release_page()[2]. Signed-off-by: David Howells Tested-by: Dave Wysochanski Acked-by: Jeff Layton cc: Trond Myklebust cc: Anna Schumaker cc: linux-nfs@vger.kernel.org cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/YO17ZNOcq+9PajfQ@mit.edu [1] Link: https://lore.kernel.org/r/202112100957.2oEDT20W-lkp@intel.com/ [2] Link: https://lore.kernel.org/r/163189108292.2509237.12615909591150927232.stgit@warthog.procyon.org.uk/ [2] Link: https://lore.kernel.org/r/163906981318.143852.17220018647843475985.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967184451.1823006.6450645559828329590.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021577632.640689.11069627070150063812.stgit@warthog.procyon.org.uk/ # v4 --- include/linux/fscache.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 7bd35f60d19a..ede50406bcb0 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -168,6 +168,7 @@ extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool); extern void __fscache_resize_cookie(struct fscache_cookie *, loff_t); extern void __fscache_invalidate(struct fscache_cookie *, const void *, loff_t, unsigned int); extern int __fscache_begin_read_operation(struct netfs_cache_resources *, struct fscache_cookie *); +extern int __fscache_begin_write_operation(struct netfs_cache_resources *, struct fscache_cookie *); extern void __fscache_write_to_cache(struct fscache_cookie *, struct address_space *, loff_t, size_t, loff_t, netfs_io_terminated_t, void *, @@ -499,6 +500,33 @@ int fscache_read(struct netfs_cache_resources *cres, term_func, term_func_priv); } +/** + * fscache_begin_write_operation - Begin a write operation for the netfs lib + * @cres: The cache resources for the write being performed + * @cookie: The cookie representing the cache object + * + * Begin a write operation on behalf of the netfs helper library. @cres + * indicates the cache resources to which the operation state should be + * attached; @cookie indicates the cache object that will be accessed. + * + * @cres->inval_counter is set from @cookie->inval_counter for comparison at + * the end of the operation. This allows invalidation during the operation to + * be detected by the caller. + * + * Returns: + * * 0 - Success + * * -ENOBUFS - No caching available + * * Other error code from the cache, such as -ENOMEM. + */ +static inline +int fscache_begin_write_operation(struct netfs_cache_resources *cres, + struct fscache_cookie *cookie) +{ + if (fscache_cookie_enabled(cookie)) + return __fscache_begin_write_operation(cres, cookie); + return -ENOBUFS; +} + /** * fscache_write - Start a write to the cache. * @cres: The cache resources to use -- cgit v1.2.3 From dc6c6fb3d639756a532bcc47d4a9bf9f3965881b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Jan 2022 13:26:51 -0500 Subject: SUNRPC: Fix sockaddr handling in the svc_xprt_create_error trace point While testing, I got an unexpected KASAN splat: Jan 08 13:50:27 oracle-102.nfsv4.dev kernel: BUG: KASAN: stack-out-of-bounds in trace_event_raw_event_svc_xprt_create_err+0x190/0x210 [sunrpc] Jan 08 13:50:27 oracle-102.nfsv4.dev kernel: Read of size 28 at addr ffffc9000008f728 by task mount.nfs/4628 The memcpy() in the TP_fast_assign section of this trace point copies the size of the destination buffer in order that the buffer won't be overrun. In other similar trace points, the source buffer for this memcpy is a "struct sockaddr_storage" so the actual length of the source buffer is always long enough to prevent the memcpy from reading uninitialized or unallocated memory. However, for this trace point, the source buffer can be as small as a "struct sockaddr_in". For AF_INET sockaddrs, the memcpy() reads memory that follows the source buffer, which is not always valid memory. To avoid copying past the end of the passed-in sockaddr, make the source address's length available to the memcpy(). It would be a little nicer if the tracing infrastructure was more friendly about storing socket addresses that are not AF_INET, but I could not find a way to make printk("%pIS") work with a dynamic array. Reported-by: KASAN Fixes: 4b8f380e46e4 ("SUNRPC: Tracepoint to record errors in svc_xpo_create()") Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 684cc0e322fa..1c0a288f6a5c 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1744,10 +1744,11 @@ TRACE_EVENT(svc_xprt_create_err, const char *program, const char *protocol, struct sockaddr *sap, + size_t salen, const struct svc_xprt *xprt ), - TP_ARGS(program, protocol, sap, xprt), + TP_ARGS(program, protocol, sap, salen, xprt), TP_STRUCT__entry( __field(long, error) @@ -1760,7 +1761,7 @@ TRACE_EVENT(svc_xprt_create_err, __entry->error = PTR_ERR(xprt); __assign_str(program, program); __assign_str(protocol, protocol); - memcpy(__entry->addr, sap, sizeof(__entry->addr)); + memcpy(__entry->addr, sap, min(salen, sizeof(__entry->addr))); ), TP_printk("addr=%pISpc program=%s protocol=%s error=%ld", -- cgit v1.2.3 From 16720861675393a35974532b3c837d9fd7bfe08c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 8 Jan 2022 16:59:54 -0500 Subject: SUNRPC: Fix sockaddr handling in svcsock_accept_class trace points Avoid potentially hazardous memory copying and the needless use of "%pIS" -- in the kernel, an RPC service listener is always bound to ANYADDR. Having the network namespace is helpful when recording errors, though. Fixes: a0469f46faab ("SUNRPC: Replace dprintk call sites in TCP state change callouts") Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 1c0a288f6a5c..1e566ac4b812 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -2125,17 +2125,17 @@ DECLARE_EVENT_CLASS(svcsock_accept_class, TP_STRUCT__entry( __field(long, status) __string(service, service) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __field(unsigned int, netns_ino) ), TP_fast_assign( __entry->status = status; __assign_str(service, service); - memcpy(__entry->addr, &xprt->xpt_local, sizeof(__entry->addr)); + __entry->netns_ino = xprt->xpt_net->ns.inum; ), - TP_printk("listener=%pISpc service=%s status=%ld", - __entry->addr, __get_str(service), __entry->status + TP_printk("addr=listener service=%s status=%ld", + __get_str(service), __entry->status ) ); -- cgit v1.2.3 From bbc605cdb1e15aafaec899fedc385dc75dddac0e Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 13 Dec 2021 14:56:18 +0100 Subject: ext4: implement support for get/set fs label Implement support for FS_IOC_GETFSLABEL and FS_IOC_SETFSLABEL ioctls for online reading and setting of file system label. ext4_ioctl_getlabel() is simple, just get the label from the primary superblock. This might not be the first sb on the file system if 'sb=' mount option is used. In ext4_ioctl_setlabel() we update what ext4 currently views as a primary superblock and then proceed to update backup superblocks. There are two caveats: - the primary superblock might not be the first superblock and so it might not be the one used by userspace tools if read directly off the disk. - because the primary superblock might not be the first superblock we potentialy have to update it as part of backup superblock update. However the first sb location is a bit more complicated than the rest so we have to account for that. The superblock modification is created generic enough so the infrastructure can be used for other potential superblock modification operations, such as chaning UUID. Tested with generic/492 with various configurations. I also checked the behavior with 'sb=' mount options, including very large file systems with and without sparse_super/sparse_super2. Signed-off-by: Lukas Czerner Link: https://lore.kernel.org/r/20211213135618.43303-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 0ea36b2b0662..19e957b7f941 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2837,6 +2837,29 @@ TRACE_EVENT(ext4_fc_track_range, __entry->end) ); +TRACE_EVENT(ext4_update_sb, + TP_PROTO(struct super_block *sb, ext4_fsblk_t fsblk, + unsigned int flags), + + TP_ARGS(sb, fsblk, flags), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ext4_fsblk_t, fsblk) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->fsblk = fsblk; + __entry->flags = flags; + ), + + TP_printk("dev %d,%d fsblk %llu flags %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->fsblk, __entry->flags) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ -- cgit v1.2.3 From a59466ee91aaa9d43889a4c51e01de087d188448 Mon Sep 17 00:00:00 2001 From: Karolina Drobnik Date: Tue, 11 Jan 2022 10:28:47 +0000 Subject: memblock: Remove #ifdef __KERNEL__ from memblock.h memblock.h is not a uAPI header, so __KERNEL__ guard can be deleted Signed-off-by: Karolina Drobnik Signed-off-by: Mike Rapoport Link: https://lore.kernel.org/r/20220111102847.673746-1-karolinadrobnik@gmail.com --- include/linux/memblock.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 9dc7cb239d21..50ad19662a32 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _LINUX_MEMBLOCK_H #define _LINUX_MEMBLOCK_H -#ifdef __KERNEL__ /* * Logical memory blocks. @@ -605,6 +604,5 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end) } #endif -#endif /* __KERNEL__ */ #endif /* _LINUX_MEMBLOCK_H */ -- cgit v1.2.3 From 16f035d9e264d95d61d5f4056bb00d8169a7a3d1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 7 Jan 2022 01:13:02 +0100 Subject: sections: Fix __is_kernel() to include init ranges With CONFIG_KALLSYMS_ALL=y, the function is_ksym_addr() is used to determine if a symbol is from inside the kernel range. For that the given symbol address is checked if it's inside the _stext to _end range. Although this is correct, some architectures (e.g. parisc) may have the init area before the _stext address and as such the check in is_ksym_addr() fails. By extending the range check to include the init section, __is_kernel() will now detect symbols in this range as well. This fixes an issue on parisc where addresses of kernel functions in init sections aren't resolved to their symbol names. Signed-off-by: Helge Deller --- include/asm-generic/sections.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 1dfadb2e878d..00566b1fd699 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -193,12 +193,16 @@ static inline bool __is_kernel_text(unsigned long addr) * @addr: address to check * * Returns: true if the address is located in the kernel range, false otherwise. - * Note: an internal helper, only check the range of _stext to _end. + * Note: an internal helper, check the range of _stext to _end, + * and range from __init_begin to __init_end, which can be outside + * of the _stext to _end range. */ static inline bool __is_kernel(unsigned long addr) { - return addr >= (unsigned long)_stext && - addr < (unsigned long)_end; + return ((addr >= (unsigned long)_stext && + addr < (unsigned long)_end) || + (addr >= (unsigned long)__init_begin && + addr < (unsigned long)__init_end)); } #endif /* _ASM_GENERIC_SECTIONS_H_ */ -- cgit v1.2.3 From 500b55b05d0a21c4adddf4c3b29ee6f32b502046 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 21 Dec 2021 10:45:07 -0600 Subject: PCI: Work around Intel I210 ROM BAR overlap defect Per PCIe r5, sec 7.5.1.2.4, a device must not claim accesses to its Expansion ROM unless both the Memory Space Enable and the Expansion ROM Enable bit are set. But apparently some Intel I210 NICs don't work correctly if the ROM BAR overlaps another BAR, even if the Expansion ROM is disabled. Michael reported that on a Kontron SMARC-sAL28 ARM64 system with U-Boot v2021.01-rc3, the ROM BAR overlaps BAR 3, and networking doesn't work at all: BAR 0: 0x40000000 (32-bit, non-prefetchable) [size=1M] BAR 3: 0x40200000 (32-bit, non-prefetchable) [size=16K] ROM: 0x40200000 (disabled) [size=1M] NETDEV WATCHDOG: enP2p1s0 (igb): transmit queue 0 timed out Hardware name: Kontron SMARC-sAL28 (Single PHY) on SMARC Eval 2.0 carrier (DT) igb 0002:01:00.0 enP2p1s0: Reset adapter Previously, pci_std_update_resource() wrote the assigned ROM address to the BAR only when the ROM was enabled. This meant that the I210 ROM BAR could be left with an address assigned by firmware, which might overlap with other BARs. Quirk these I210 devices so pci_std_update_resource() always writes the assigned address to the ROM BAR, whether or not the ROM is enabled. Link: https://lore.kernel.org/r/20211223163754.GA1267351@bhelgaas Link: https://lore.kernel.org/r/20201230185317.30915-1-michael@walle.cc Link: https://bugzilla.kernel.org/show_bug.cgi?id=211105 Reported-by: Michael Walle Tested-by: Michael Walle Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 18a75c8e615c..51c4a063f489 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -455,6 +455,7 @@ struct pci_dev { unsigned int link_active_reporting:1;/* Device capable of reporting link active */ unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */ unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */ + unsigned int rom_bar_overlap:1; /* ROM BAR disable broken */ pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ -- cgit v1.2.3 From 1d3cfc2835c1754d19a743dc346a9e58cf0c07c0 Mon Sep 17 00:00:00 2001 From: Kelvin Cao Date: Thu, 23 Dec 2021 17:23:33 -0800 Subject: ntb_hw_switchtec: Remove code for disabling ID protection ID protection is a firmware setting for NT window access control. With it enabled, only the posted requests with requester IDs in the requester ID table will be allowed to access the NT windows. Otherwise all posted requests are allowed. Normally user will configure it statically via the Switchtec config file, and it will take effect when the firmware boots up. The driver can also toggle the ID protection setting dynamically, which will overwrite the static setting in the Switchtec config file as a side effect. Currently, the driver disables the ID protection. However, it's not necessary to disable the ID protection at the driver level as the driver has already configured the proper requester IDs in the requester ID table to allow the corresponding posted requests to hit the NT windows. Remove the code that disables the ID protection to make the static setting prevail. Note: ID protection is not applicable to non-posted requests. Signed-off-by: Kelvin Cao Signed-off-by: Jon Mason --- include/linux/switchtec.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index be24056ac00f..48fabe36509e 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -337,8 +337,6 @@ enum { NTB_CTRL_REQ_ID_EN = 1 << 0, NTB_CTRL_LUT_EN = 1 << 0, - - NTB_PART_CTRL_ID_PROT_DIS = 1 << 0, }; struct ntb_ctrl_regs { -- cgit v1.2.3 From 9b7a4de9f126d8c8d59052088213990159417d5b Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 16 Dec 2021 10:45:03 +0100 Subject: drm/amdkfd: make SPDX License expression more sound Commit b5f57384805a ("drm/amdkfd: Add sysfs bitfields and enums to uAPI") adds include/uapi/linux/kfd_sysfs.h with the "GPL-2.0 OR MIT WITH Linux-syscall-note" SPDX-License expression. The command ./scripts/spdxcheck.py warns: include/uapi/linux/kfd_sysfs.h: 1:48 Exception not valid for license MIT: Linux-syscall-note For a uapi header, the file under GPLv2 License must be combined with the Linux-syscall-note, but combining the MIT License with the Linux-syscall-note makes no sense, as the note provides an exception for GPL-licensed code, not for permissively licensed code. So, reorganize the SPDX expression to only combine the note with the GPL License condition. This makes spdxcheck happy again. Fixes: b5f57384805a ("drm/amdkfd: Add sysfs bitfields and enums to uAPI") Signed-off-by: Lukas Bulwahn Reviewed-by: kstewart@linuxfoundation.org Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_sysfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h index e1fb78b4bf09..3e330f368917 100644 --- a/include/uapi/linux/kfd_sysfs.h +++ b/include/uapi/linux/kfd_sysfs.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT WITH Linux-syscall-note */ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ /* * Copyright 2021 Advanced Micro Devices, Inc. * -- cgit v1.2.3 From e6435f1e02f410e3507f02a37c0fbb17971ddc7c Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 6 Dec 2021 15:54:04 +0000 Subject: fscache: Add a tracepoint for cookie use/unuse Add a tracepoint to track fscache_use/unuse_cookie(). Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/164021588628.640689.12942919367404043608.stgit@warthog.procyon.org.uk/ # v4 --- include/trace/events/fscache.h | 44 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include') diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index 1594aefadeac..cb3fb337e880 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -71,6 +71,12 @@ enum fscache_cookie_trace { fscache_cookie_see_work, }; +enum fscache_active_trace { + fscache_active_use, + fscache_active_use_modify, + fscache_active_unuse, +}; + enum fscache_access_trace { fscache_access_acquire_volume, fscache_access_acquire_volume_end, @@ -146,6 +152,11 @@ enum fscache_access_trace { EM(fscache_cookie_see_withdraw, "- x-wth") \ E_(fscache_cookie_see_work, "- work ") +#define fscache_active_traces \ + EM(fscache_active_use, "USE ") \ + EM(fscache_active_use_modify, "USE-m ") \ + E_(fscache_active_unuse, "UNUSE ") + #define fscache_access_traces \ EM(fscache_access_acquire_volume, "BEGIN acq_vol") \ EM(fscache_access_acquire_volume_end, "END acq_vol") \ @@ -264,6 +275,39 @@ TRACE_EVENT(fscache_cookie, __entry->ref) ); +TRACE_EVENT(fscache_active, + TP_PROTO(unsigned int cookie_debug_id, + int ref, + int n_active, + int n_accesses, + enum fscache_active_trace why), + + TP_ARGS(cookie_debug_id, ref, n_active, n_accesses, why), + + TP_STRUCT__entry( + __field(unsigned int, cookie ) + __field(int, ref ) + __field(int, n_active ) + __field(int, n_accesses ) + __field(enum fscache_active_trace, why ) + ), + + TP_fast_assign( + __entry->cookie = cookie_debug_id; + __entry->ref = ref; + __entry->n_active = n_active; + __entry->n_accesses = n_accesses; + __entry->why = why; + ), + + TP_printk("c=%08x %s r=%d a=%d c=%d", + __entry->cookie, + __print_symbolic(__entry->why, fscache_active_traces), + __entry->ref, + __entry->n_accesses, + __entry->n_active) + ); + TRACE_EVENT(fscache_access_cache, TP_PROTO(unsigned int cache_debug_id, int ref, -- cgit v1.2.3 From b1ae6dc41eaaa98bb75671e0f3665bfda248c3e7 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 5 Jan 2022 13:55:12 -0800 Subject: module: add in-kernel support for decompressing Current scheme of having userspace decompress kernel modules before loading them into the kernel runs afoul of LoadPin security policy, as it loses link between the source of kernel module on the disk and binary blob that is being loaded into the kernel. To solve this issue let's implement decompression in kernel, so that we can pass a file descriptor of compressed module file into finit_module() which will keep LoadPin happy. To let userspace know what compression/decompression scheme kernel supports it will create /sys/module/compression attribute. kmod can read this attribute and decide if it can pass compressed file to finit_module(). New MODULE_INIT_COMPRESSED_DATA flag indicates that the kernel should attempt to decompress the data read from file descriptor prior to trying load the module. To simplify things kernel will only implement single decompression method matching compression method selected when generating modules. This patch implements gzip and xz; more can be added later, Signed-off-by: Dmitry Torokhov Signed-off-by: Luis Chamberlain --- include/uapi/linux/module.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/module.h b/include/uapi/linux/module.h index 50d98ec5e866..03a33ffffcba 100644 --- a/include/uapi/linux/module.h +++ b/include/uapi/linux/module.h @@ -5,5 +5,6 @@ /* Flags for sys_finit_module: */ #define MODULE_INIT_IGNORE_MODVERSIONS 1 #define MODULE_INIT_IGNORE_VERMAGIC 2 +#define MODULE_INIT_COMPRESSED_FILE 4 #endif /* _UAPI_LINUX_MODULE_H */ -- cgit v1.2.3 From ca321ec74322e3c49552fc1ffc80b42d0dbf1a84 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 8 Jan 2022 15:06:57 +0100 Subject: module.h: allow #define strings to work with MODULE_IMPORT_NS The MODULE_IMPORT_NS() macro does not allow defined strings to work properly with it, so add a layer of indirection to allow this to happen. Cc: Luis Chamberlain Cc: Jessica Yu Cc: Matthias Maennich Signed-off-by: Greg Kroah-Hartman Reviewed-by: Matthias Maennich Signed-off-by: Luis Chamberlain --- include/linux/module.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index c9f1200b2312..f4338235ed2c 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -290,7 +290,8 @@ extern typeof(name) __mod_##type##__##name##_device_table \ * files require multiple MODULE_FIRMWARE() specifiers */ #define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware) -#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns) +#define _MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns) +#define MODULE_IMPORT_NS(ns) _MODULE_IMPORT_NS(ns) struct notifier_block; -- cgit v1.2.3 From cb963a19d99fc42d9abf4238968ef85fcc2ef3e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Jan 2022 01:47:50 -0800 Subject: net: sched: do not allocate a tracker in tcf_exts_init() While struct tcf_exts has a net pointer, it is not refcounted until tcf_exts_get_net() is called. Fixes: dbdcda634ce3 ("net: sched: add netns refcount tracker to struct tcf_exts") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20220110094750.236478-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/pkt_cls.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ebef45e821af..676cb8ea9e15 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -218,8 +218,10 @@ static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net, #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; + /* Note: we do not own yet a reference on net. + * This reference might be taken later from tcf_exts_get_net(). + */ exts->net = net; - netns_tracker_alloc(net, &exts->ns_tracker, GFP_KERNEL); exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); if (!exts->actions) -- cgit v1.2.3 From 869b6ca39c08c5b10eeb29d4b3c4bc433bf8ba5e Mon Sep 17 00:00:00 2001 From: Huang Yiwei Date: Mon, 22 Nov 2021 13:05:09 +0800 Subject: dt-bindings: mailbox: Add more protocol and client ID Add more protocol and client ID which can be used in device tree properties. Signed-off-by: Huang Yiwei Signed-off-by: Jassi Brar --- include/dt-bindings/mailbox/qcom-ipcc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/mailbox/qcom-ipcc.h b/include/dt-bindings/mailbox/qcom-ipcc.h index eb91a6c05b71..9296d0bb5f34 100644 --- a/include/dt-bindings/mailbox/qcom-ipcc.h +++ b/include/dt-bindings/mailbox/qcom-ipcc.h @@ -8,6 +8,7 @@ /* Signal IDs for MPROC protocol */ #define IPCC_MPROC_SIGNAL_GLINK_QMP 0 +#define IPCC_MPROC_SIGNAL_TZ 1 #define IPCC_MPROC_SIGNAL_SMP2P 2 #define IPCC_MPROC_SIGNAL_PING 3 @@ -29,6 +30,7 @@ #define IPCC_CLIENT_PCIE1 14 #define IPCC_CLIENT_PCIE2 15 #define IPCC_CLIENT_SPSS 16 +#define IPCC_CLIENT_TME 23 #define IPCC_CLIENT_WPSS 24 #endif -- cgit v1.2.3 From 831c1ae725f7d2f8f858b0840692b48e75b49331 Mon Sep 17 00:00:00 2001 From: Sunil Muthuswamy Date: Wed, 5 Jan 2022 11:32:35 -0800 Subject: PCI: hv: Make the code arch neutral by adding arch specific interfaces Encapsulate arch dependencies in Hyper-V vPCI through a set of arch-dependent interfaces. Adding these arch specific interfaces will allow for an implementation for other architectures, such as arm64. There are no functional changes expected from this patch. Link: https://lore.kernel.org/r/1641411156-31705-2-git-send-email-sunilmut@linux.microsoft.com Signed-off-by: Sunil Muthuswamy Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Boqun Feng Reviewed-by: Marc Zyngier Reviewed-by: Michael Kelley --- include/asm-generic/hyperv-tlfs.h | 33 --------------------------------- 1 file changed, 33 deletions(-) (limited to 'include') diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index 8ed6733d5146..8f97c2927bee 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -540,39 +540,6 @@ enum hv_interrupt_source { HV_INTERRUPT_SOURCE_IOAPIC, }; -union hv_msi_address_register { - u32 as_uint32; - struct { - u32 reserved1:2; - u32 destination_mode:1; - u32 redirection_hint:1; - u32 reserved2:8; - u32 destination_id:8; - u32 msi_base:12; - }; -} __packed; - -union hv_msi_data_register { - u32 as_uint32; - struct { - u32 vector:8; - u32 delivery_mode:3; - u32 reserved1:3; - u32 level_assert:1; - u32 trigger_mode:1; - u32 reserved2:16; - }; -} __packed; - -/* HvRetargetDeviceInterrupt hypercall */ -union hv_msi_entry { - u64 as_uint64; - struct { - union hv_msi_address_register address; - union hv_msi_data_register data; - } __packed; -}; - union hv_ioapic_rte { u64 as_uint64; -- cgit v1.2.3 From 3f4b32511a77bc5a05cfbf26fec94c4e1b1cf46a Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 7 Jan 2022 18:17:18 +0000 Subject: PM: core: Remove DEFINE_UNIVERSAL_DEV_PM_OPS() macro The deprecated UNIVERSAL_DEV_PM_OPS() macro uses the provided callbacks for both runtime PM and system sleep, which is very likely to be a mistake, as a system sleep can be triggered while a given device is already PM-suspended, which would cause the suspend callback to be called twice. The amount of users of UNIVERSAL_DEV_PM_OPS() is also tiny (16 occurences) compared to the number of places where SET_SYSTEM_SLEEP_PM_OPS() is used with pm_runtime_force_suspend() and pm_runtime_force_resume(), which makes me think that none of these cases are actually valid. As the new macro DEFINE_UNIVERSAL_DEV_PM_OPS() which was introduced to replace UNIVERSAL_DEV_PM_OPS() is currently unused, remove it before someone starts to use it in yet another invalid case. Signed-off-by: Paul Cercueil Acked-by: Jonathan Cameron Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index e1e9402180b9..02f059d814bb 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -366,6 +366,12 @@ static const struct dev_pm_ops name = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } +/* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */ +#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ +const struct dev_pm_ops __maybe_unused name = { \ + SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ +} + /* * Use this for defining a set of PM operations to be used in all situations * (system suspend, hibernation or runtime PM). @@ -378,20 +384,9 @@ static const struct dev_pm_ops name = { \ * suspend and "early" resume callback pointers, .suspend_late() and * .resume_early(), to the same routines as .runtime_suspend() and * .runtime_resume(), respectively (and analogously for hibernation). + * + * Deprecated. You most likely don't want this macro. */ -#define DEFINE_UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ -static const struct dev_pm_ops name = { \ - SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ - RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ -} - -/* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */ -#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ -const struct dev_pm_ops __maybe_unused name = { \ - SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ -} - -/* Deprecated. Use DEFINE_UNIVERSAL_DEV_PM_OPS() instead. */ #define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ const struct dev_pm_ops __maybe_unused name = { \ SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ -- cgit v1.2.3 From 52cc1d7f9786d2be44a3ab9b5b48416a7618e713 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 7 Jan 2022 18:17:19 +0000 Subject: PM: core: Remove static qualifier in DEFINE_SIMPLE_DEV_PM_OPS macro Keep this macro in line with the other ones. This makes it possible to use them in the cases where the underlying dev_pm_ops structure is exported. Restore the "static" qualifier in the two drivers where the DEFINE_SIMPLE_DEV_PM_OPS macro was used. Signed-off-by: Paul Cercueil Acked-by: Jonathan Cameron Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index 02f059d814bb..8e13387e70ec 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -362,7 +362,7 @@ struct dev_pm_ops { * to RAM and hibernation. */ #define DEFINE_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ -static const struct dev_pm_ops name = { \ +const struct dev_pm_ops name = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } -- cgit v1.2.3 From 0ae101fdd3297b7165755340e05386f1e1379709 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 7 Jan 2022 18:17:20 +0000 Subject: PM: core: Add EXPORT[_GPL]_SIMPLE_DEV_PM_OPS macros These macros are defined conditionally, according to CONFIG_PM: - if CONFIG_PM is enabled, these macros resolve to DEFINE_SIMPLE_DEV_PM_OPS(), and the dev_pm_ops symbol will be exported. - if CONFIG_PM is disabled, these macros will result in a dummy static dev_pm_ops to be created with the __maybe_unused flag. The dev_pm_ops will then be discarded by the compiler, along with the provided callback functions if they are not used anywhere else. In the second case, the symbol is not exported, which should be perfectly fine - users of the symbol should all use the pm_ptr() or pm_sleep_ptr() macro, so the dev_pm_ops marked as "extern" in the client's code will never be accessed. Signed-off-by: Paul Cercueil Acked-by: Jonathan Cameron Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index 8e13387e70ec..8279af2c538a 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -8,6 +8,7 @@ #ifndef _LINUX_PM_H #define _LINUX_PM_H +#include #include #include #include @@ -357,14 +358,42 @@ struct dev_pm_ops { #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) #endif +#define _DEFINE_DEV_PM_OPS(name, \ + suspend_fn, resume_fn, \ + runtime_suspend_fn, runtime_resume_fn, idle_fn) \ +const struct dev_pm_ops name = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + RUNTIME_PM_OPS(runtime_suspend_fn, runtime_resume_fn, idle_fn) \ +} + +#ifdef CONFIG_PM +#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ + runtime_resume_fn, idle_fn, sec) \ + _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ + runtime_resume_fn, idle_fn); \ + _EXPORT_SYMBOL(name, sec) +#else +#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ + runtime_resume_fn, idle_fn, sec) \ +static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \ + resume_fn, runtime_suspend_fn, \ + runtime_resume_fn, idle_fn) +#endif + /* * Use this if you want to use the same suspend and resume callbacks for suspend * to RAM and hibernation. + * + * If the underlying dev_pm_ops struct symbol has to be exported, use + * EXPORT_SIMPLE_DEV_PM_OPS() or EXPORT_GPL_SIMPLE_DEV_PM_OPS() instead. */ #define DEFINE_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ -const struct dev_pm_ops name = { \ - SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ -} + _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL) + +#define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ + _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "") +#define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ + _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl") /* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */ #define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ -- cgit v1.2.3 From 9d8619190031af0a314bee865262d8975473e4dd Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 7 Jan 2022 18:17:21 +0000 Subject: PM: runtime: Add DEFINE_RUNTIME_DEV_PM_OPS() macro A lot of drivers create a dev_pm_ops struct with the system sleep suspend/resume callbacks set to pm_runtime_force_suspend() and pm_runtime_force_resume(). These drivers can now use the DEFINE_RUNTIME_DEV_PM_OPS() macro, which will use pm_runtime_force_{suspend,resume}() as the system sleep callbacks, while having the same dead code removal characteristic that is already provided by DEFINE_SIMPLE_DEV_PM_OPS(). Signed-off-by: Paul Cercueil Acked-by: Jonathan Cameron Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 3 ++- include/linux/pm_runtime.h | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index 8279af2c538a..f7d2be686359 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -414,7 +414,8 @@ const struct dev_pm_ops __maybe_unused name = { \ * .resume_early(), to the same routines as .runtime_suspend() and * .runtime_resume(), respectively (and analogously for hibernation). * - * Deprecated. You most likely don't want this macro. + * Deprecated. You most likely don't want this macro. Use + * DEFINE_RUNTIME_DEV_PM_OPS() instead. */ #define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ const struct dev_pm_ops __maybe_unused name = { \ diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 016de5776b6d..4af454d29281 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -22,6 +22,20 @@ usage_count */ #define RPM_AUTO 0x08 /* Use autosuspend_delay */ +/* + * Use this for defining a set of PM operations to be used in all situations + * (system suspend, hibernation or runtime PM). + * + * Note that the behaviour differs from the deprecated UNIVERSAL_DEV_PM_OPS() + * macro, which uses the provided callbacks for both runtime PM and system + * sleep, while DEFINE_RUNTIME_DEV_PM_OPS() uses pm_runtime_force_suspend() + * and pm_runtime_force_resume() for its system sleep callbacks. + */ +#define DEFINE_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ + _DEFINE_DEV_PM_OPS(name, pm_runtime_force_suspend, \ + pm_runtime_force_resume, suspend_fn, \ + resume_fn, idle_fn) + #ifdef CONFIG_PM extern struct workqueue_struct *pm_wq; -- cgit v1.2.3 From d59ff7d9d84b03d22c5107f794e28fc8e1fce3a6 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 7 Jan 2022 18:17:22 +0000 Subject: PM: runtime: Add EXPORT[_GPL]_RUNTIME_DEV_PM_OPS macros Similar to EXPORT[_GPL]_SIMPLE_DEV_PM_OPS, but for users with runtime-PM suspend/resume callbacks. Signed-off-by: Paul Cercueil Acked-by: Jonathan Cameron Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 4af454d29281..9f09601c465a 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -30,12 +30,22 @@ * macro, which uses the provided callbacks for both runtime PM and system * sleep, while DEFINE_RUNTIME_DEV_PM_OPS() uses pm_runtime_force_suspend() * and pm_runtime_force_resume() for its system sleep callbacks. + * + * If the underlying dev_pm_ops struct symbol has to be exported, use + * EXPORT_RUNTIME_DEV_PM_OPS() or EXPORT_GPL_RUNTIME_DEV_PM_OPS() instead. */ #define DEFINE_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ _DEFINE_DEV_PM_OPS(name, pm_runtime_force_suspend, \ pm_runtime_force_resume, suspend_fn, \ resume_fn, idle_fn) +#define EXPORT_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ + _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ + suspend_fn, resume_fn, idle_fn, "") +#define EXPORT_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ + _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ + suspend_fn, resume_fn, idle_fn, "_gpl") + #ifdef CONFIG_PM extern struct workqueue_struct *pm_wq; -- cgit v1.2.3 From 2d7c86a8f9cdce1408c4f3c69d94d007eff2f179 Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Wed, 14 Jul 2021 15:35:50 +0530 Subject: libceph: generalize addr/ip parsing based on delimiter ... and remove hardcoded function name in ceph_parse_ips(). [ idryomov: delim parameter, drop CEPH_ADDR_PARSE_DEFAULT_DELIM ] Signed-off-by: Venky Shankar Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 2 +- include/linux/ceph/messenger.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 409d8c29bc4f..c72285d8594e 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -301,7 +301,7 @@ struct fs_parameter; struct fc_log; struct ceph_options *ceph_alloc_options(void); int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt, - struct fc_log *l); + struct fc_log *l, char delim); int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, struct fc_log *l); int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 0e6e9ad3c3bf..ff99ce094cfa 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -532,7 +532,7 @@ extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr); extern int ceph_parse_ips(const char *c, const char *end, struct ceph_entity_addr *addr, - int max_count, int *count); + int max_count, int *count, char delim); extern int ceph_msgr_init(void); extern void ceph_msgr_exit(void); -- cgit v1.2.3 From 4153c7fc937a2afa077dbdb9fe3189b9981f423c Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Wed, 14 Jul 2021 15:35:51 +0530 Subject: libceph: rename parse_fsid() to ceph_parse_fsid() and export ... as it is too generic. also, use __func__ when logging rather than hardcoding the function name. Signed-off-by: Venky Shankar Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index c72285d8594e..644f224eccf7 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -296,6 +296,7 @@ extern bool libceph_compatible(void *data); extern const char *ceph_msg_type_name(int type); extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); extern void *ceph_kvmalloc(size_t size, gfp_t flags); +extern int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid); struct fs_parameter; struct fc_log; -- cgit v1.2.3 From a0b3a15eab6bc2e90008460b646d53e7d9dcdbbb Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 10 Jan 2022 18:28:33 -0500 Subject: ceph: move CEPH_SUPER_MAGIC definition to magic.h The uapi headers are missing the ceph definition. Move it there so userland apps can ID cephfs. Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/uapi/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 35687dcb1a42..53a3c20394cf 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -6,6 +6,7 @@ #define AFFS_SUPER_MAGIC 0xadff #define AFS_SUPER_MAGIC 0x5346414F #define AUTOFS_SUPER_MAGIC 0x0187 +#define CEPH_SUPER_MAGIC 0x00c36400 #define CODA_SUPER_MAGIC 0x73757245 #define CRAMFS_MAGIC 0x28cd3d45 /* some random number */ #define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */ -- cgit v1.2.3 From 91341fa0003befd097e190ec2a4bf63ad957c49a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Jan 2022 01:22:29 -0800 Subject: inet: frags: annotate races around fqdir->dead and fqdir->high_thresh Both fields can be read/written without synchronization, add proper accessors and documentation. Fixes: d5dd88794a13 ("inet: fix various use-after-free in defrags units") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_frag.h | 11 +++++++++-- include/net/ipv6_frag.h | 3 ++- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 48cc5795ceda..63540be0fc34 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -117,8 +117,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); static inline void fqdir_pre_exit(struct fqdir *fqdir) { - fqdir->high_thresh = 0; /* prevent creation of new frags */ - fqdir->dead = true; + /* Prevent creation of new frags. + * Pairs with READ_ONCE() in inet_frag_find(). + */ + WRITE_ONCE(fqdir->high_thresh, 0); + + /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() + * and ip6frag_expire_frag_queue(). + */ + WRITE_ONCE(fqdir->dead, true); } void fqdir_exit(struct fqdir *fqdir); diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 851029ecff13..0a4779175a52 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -67,7 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) struct sk_buff *head; rcu_read_lock(); - if (fq->q.fqdir->dead) + /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(fq->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&fq->q.lock); -- cgit v1.2.3 From fb80445c438c78b40b547d12b8d56596ce4ccfeb Mon Sep 17 00:00:00 2001 From: Kevin Bracey Date: Wed, 12 Jan 2022 19:02:10 +0200 Subject: net_sched: restore "mpu xxx" handling commit 56b765b79e9a ("htb: improved accuracy at high rates") broke "overhead X", "linklayer atm" and "mpu X" attributes. "overhead X" and "linklayer atm" have already been fixed. This restores the "mpu X" handling, as might be used by DOCSIS or Ethernet shaping: tc class add ... htb rate X overhead 4 mpu 64 The code being fixed is used by htb, tbf and act_police. Cake has its own mpu handling. qdisc_calculate_pkt_len still uses the size table containing values adjusted for mpu by user space. iproute2 tc has always passed mpu into the kernel via a tc_ratespec structure, but the kernel never directly acted on it, merely stored it so that it could be read back by `tc class show`. Rather, tc would generate length-to-time tables that included the mpu (and linklayer) in their construction, and the kernel used those tables. Since v3.7, the tables were no longer used. Along with "mpu", this also broke "overhead" and "linklayer" which were fixed in 01cb71d2d47b ("net_sched: restore "overhead xxx" handling", v3.10) and 8a8e3d84b171 ("net_sched: restore "linklayer atm" handling", v3.11). "overhead" was fixed by simply restoring use of tc_ratespec::overhead - this had originally been used by the kernel but was initially omitted from the new non-table-based calculations. "linklayer" had been handled in the table like "mpu", but the mode was not originally passed in tc_ratespec. The new implementation was made to handle it by getting new versions of tc to pass the mode in an extended tc_ratespec, and for older versions of tc the table contents were analysed at load time to deduce linklayer. As "mpu" has always been given to the kernel in tc_ratespec, accompanying the mpu-based table, we can restore system functionality with no userspace change by making the kernel act on the tc_ratespec value. Fixes: 56b765b79e9a ("htb: improved accuracy at high rates") Signed-off-by: Kevin Bracey Cc: Eric Dumazet Cc: Jiri Pirko Cc: Vimalkumar Link: https://lore.kernel.org/r/20220112170210.1014351-1-kevin@bracey.fi Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c11dbac5abb2..472843eedbae 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1244,6 +1244,7 @@ struct psched_ratecfg { u64 rate_bytes_ps; /* bytes per second */ u32 mult; u16 overhead; + u16 mpu; u8 linklayer; u8 shift; }; @@ -1253,6 +1254,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, { len += r->overhead; + if (len < r->mpu) + len = r->mpu; + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; @@ -1275,6 +1279,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, res->rate = min_t(u64, r->rate_bytes_ps, ~0U); res->overhead = r->overhead; + res->mpu = r->mpu; res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } -- cgit v1.2.3 From 180dccb0dba4f5e84a4a70c1be1d34cbb6528b32 Mon Sep 17 00:00:00 2001 From: Laibin Qiu Date: Thu, 13 Jan 2022 10:55:36 +0800 Subject: blk-mq: fix tag_get wait task can't be awakened In case of shared tags, there might be more than one hctx which allocates from the same tags, and each hctx is limited to allocate at most: hctx_max_depth = max((bt->sb.depth + users - 1) / users, 4U); tag idle detection is lazy, and may be delayed for 30sec, so there could be just one real active hctx(queue) but all others are actually idle and still accounted as active because of the lazy idle detection. Then if wake_batch is > hctx_max_depth, driver tag allocation may wait forever on this real active hctx. Fix this by recalculating wake_batch when inc or dec active_queues. Fixes: 0d2602ca30e41 ("blk-mq: improve support for shared tags maps") Suggested-by: Ming Lei Suggested-by: John Garry Signed-off-by: Laibin Qiu Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220113025536.1479653-1-qiulaibin@huawei.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index fc0357a6e19b..95df357ec009 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -415,6 +415,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) sbitmap_free(&sbq->sb); } +/** + * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch + * @sbq: Bitmap queue to recalculate wake batch. + * @users: Number of shares. + * + * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch + * by depth. This interface is for HCTX shared tags or queue shared tags. + */ +void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, + unsigned int users); + /** * sbitmap_queue_resize() - Resize a &struct sbitmap_queue. * @sbq: Bitmap queue to resize. -- cgit v1.2.3 From 289e7b0f7eb47b87a0441e6c81336316f301eb39 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 13 Dec 2021 11:08:53 +0100 Subject: tracing: Account bottom half disabled sections. Disabling only bottom halves via local_bh_disable() disables also preemption but this remains invisible to tracing. On a CONFIG_PREEMPT kernel one might wonder why there is no scheduling happening despite the N flag in the trace. The reason might be the a rcu_read_lock_bh() section. Add a 'b' to the tracing output if in task context with disabled bottom halves. Link: https://lkml.kernel.org/r/YbcbtdtC/bjCKo57@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 3900404aa063..70c069aef02c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -172,6 +172,7 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, TRACE_FLAG_NMI = 0x40, + TRACE_FLAG_BH_OFF = 0x80, }; #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT -- cgit v1.2.3 From 6840f9094f2bd788a316d8cb0a4e42538d3e47dd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 13 Jan 2022 16:44:19 -0500 Subject: pagevec: Initialise folio_batch->percpu_pvec_drained When UBSAN is enabled, it reports an invalid value in __pagevec_release() when accessing pvec->percpu_pvec_drained, which is simply whatever garbage was on the stack. Initialise it when initialising the rest of the folio_batch. Fixes: 10331795fb79 ("pagevec: Add folio_batch") Reported-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/pagevec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index dda8d5868c81..67b1246f136b 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -111,6 +111,7 @@ static_assert(offsetof(struct pagevec, pages) == static inline void folio_batch_init(struct folio_batch *fbatch) { fbatch->nr = 0; + fbatch->percpu_pvec_drained = false; } static inline unsigned int folio_batch_count(struct folio_batch *fbatch) -- cgit v1.2.3 From e5b48ee30aec1fe6dff05e36b22e886c665b4736 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 16:14:46 +0900 Subject: ata: sata_fsl: fix scsi host initialization When compiling with W=1, the sata_fsl driver compilation throws the warning: drivers/ata/sata_fsl.c:1385:22: error: initialized field overwritten [-Werror=override-init] 1385 | .can_queue = SATA_FSL_QUEUE_DEPTH, This is due to the driver scsi host template initialization overwriting the can_queue field that is already set using the ATA_NCQ_SHT() initializer macro, resulting in the same field being initialized twice in the host template declaration. To remove this warning, introduce the ATA_SUBBASE_SHT_QD() and ATA_NCQ_SHT_QD() initialization macros to allow specifying a queue depth different from the default ATA_DEF_QUEUE using an additional argument to the macro. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- include/linux/libata.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index c258f69106f4..2e5e7c40c991 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1385,6 +1385,12 @@ extern const struct attribute_group *ata_common_sdev_groups[]; .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ .slave_configure = ata_scsi_slave_config +#define ATA_SUBBASE_SHT_QD(drv_name, drv_qd) \ + __ATA_BASE_SHT(drv_name), \ + .can_queue = drv_qd, \ + .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ + .slave_configure = ata_scsi_slave_config + #define ATA_BASE_SHT(drv_name) \ ATA_SUBBASE_SHT(drv_name), \ .sdev_groups = ata_common_sdev_groups @@ -1396,6 +1402,11 @@ extern const struct attribute_group *ata_ncq_sdev_groups[]; ATA_SUBBASE_SHT(drv_name), \ .sdev_groups = ata_ncq_sdev_groups, \ .change_queue_depth = ata_scsi_change_queue_depth + +#define ATA_NCQ_SHT_QD(drv_name, drv_qd) \ + ATA_SUBBASE_SHT_QD(drv_name, drv_qd), \ + .sdev_groups = ata_ncq_sdev_groups, \ + .change_queue_depth = ata_scsi_change_queue_depth #endif /* -- cgit v1.2.3 From 0561e514c944da874ccdfbe2922f71b4c333c7e1 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 4 Jan 2022 17:54:18 +0900 Subject: ata: fix read_id() ata port operation interface Drivers that need to tweak a device IDENTIFY data implement the read_id() port operation. The IDENTIFY data buffer is passed as an argument to the read_id() operation for drivers to use. However, when this operation is called, the IDENTIFY data is not yet converted to CPU endian and contains le16 words. Change the interface of the read_id operation to pass a __le16 * pointer to the IDENTIFY data buffer to clarify the buffer endianness. Fix the pata_netcell, pata_it821x, ahci_xgene, ahci_ceva and ahci_brcm drivers implementation of this operation and modify the code to corretly deal with identify data words manipulation to avoid sparse warnings such as: drivers/ata/ahci_xgene.c:262:33: warning: invalid assignment: &= drivers/ata/ahci_xgene.c:262:33: left side has type unsigned short drivers/ata/ahci_xgene.c:262:33: right side has type restricted __le16 Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- include/linux/libata.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 2e5e7c40c991..bf706cd45674 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -884,7 +884,8 @@ struct ata_port_operations { void (*set_piomode)(struct ata_port *ap, struct ata_device *dev); void (*set_dmamode)(struct ata_port *ap, struct ata_device *dev); int (*set_mode)(struct ata_link *link, struct ata_device **r_failed_dev); - unsigned int (*read_id)(struct ata_device *dev, struct ata_taskfile *tf, u16 *id); + unsigned int (*read_id)(struct ata_device *dev, struct ata_taskfile *tf, + __le16 *id); void (*dev_config)(struct ata_device *dev); @@ -1119,7 +1120,7 @@ extern void ata_id_string(const u16 *id, unsigned char *s, extern void ata_id_c_string(const u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); extern unsigned int ata_do_dev_read_id(struct ata_device *dev, - struct ata_taskfile *tf, u16 *id); + struct ata_taskfile *tf, __le16 *id); extern void ata_qc_complete(struct ata_queued_cmd *qc); extern u64 ata_qc_get_active(struct ata_port *ap); extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd); -- cgit v1.2.3 From b9ba367c513dbc165dd6c01266a59db4be2a3564 Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Wed, 5 Jan 2022 16:36:16 +0100 Subject: ata: libata: Rename link flag ATA_LFLAG_NO_DB_DELAY Rename the link flag ATA_LFLAG_NO_DB_DELAY to ATA_LFLAG_NO_DEBOUNCE_DELAY. The new name is longer, but clearer. Signed-off-by: Paul Menzel Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index bf706cd45674..605756f645be 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -143,7 +143,7 @@ enum { ATA_LFLAG_NO_LPM = (1 << 8), /* disable LPM on this link */ ATA_LFLAG_RST_ONCE = (1 << 9), /* limit recovery to one reset */ ATA_LFLAG_CHANGED = (1 << 10), /* LPM state changed on this link */ - ATA_LFLAG_NO_DB_DELAY = (1 << 11), /* no debounce delay on link resume */ + ATA_LFLAG_NO_DEBOUNCE_DELAY = (1 << 11), /* no debounce delay on link resume */ /* struct ata_port flags */ ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ -- cgit v1.2.3 From be50b2065dfa3d88428fdfdc340d154d96bf6848 Mon Sep 17 00:00:00 2001 From: Guang Zeng Date: Wed, 5 Jan 2022 04:35:29 -0800 Subject: kvm: x86: Add support for getting/setting expanded xstate buffer With KVM_CAP_XSAVE, userspace uses a hardcoded 4KB buffer to get/set xstate data from/to KVM. This doesn't work when dynamic xfeatures (e.g. AMX) are exposed to the guest as they require a larger buffer size. Introduce a new capability (KVM_CAP_XSAVE2). Userspace VMM gets the required xstate buffer size via KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2). KVM_SET_XSAVE is extended to work with both legacy and new capabilities by doing properly-sized memdup_user() based on the guest fpu container. KVM_GET_XSAVE is kept for backward-compatible reason. Instead, KVM_GET_XSAVE2 is introduced under KVM_CAP_XSAVE2 as the preferred interface for getting xstate buffer (4KB or larger size) from KVM (Link: https://lkml.org/lkml/2021/12/15/510) Also, update the api doc with the new KVM_GET_XSAVE2 ioctl. Signed-off-by: Guang Zeng Signed-off-by: Wei Wang Signed-off-by: Jing Liu Signed-off-by: Kevin Tian Signed-off-by: Yang Zhong Message-Id: <20220105123532.12586-19-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index fbfd70d965c6..9563d294f181 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1132,6 +1132,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_MTE 205 #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 #define KVM_CAP_VM_GPA_BITS 207 +#define KVM_CAP_XSAVE2 208 #ifdef KVM_CAP_IRQ_ROUTING @@ -1622,6 +1623,9 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) +/* Available with KVM_CAP_XSAVE2 */ +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + struct kvm_s390_pv_sec_parm { __u64 origin; __u64 length; -- cgit v1.2.3 From d9679d0013a66849f23057978f92e76b255c50aa Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 13 Oct 2021 06:55:44 -0400 Subject: virtio: wrap config->reset calls This will enable cleanups down the road. The idea is to disable cbs, then add "flush_queued_cbs" callback as a parameter, this way drivers can flush any work queued after callbacks have been disabled. Signed-off-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20211013105226.20225-1-mst@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/virtio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 41edbc01ffa4..72292a62cd90 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -138,6 +138,7 @@ int virtio_finalize_features(struct virtio_device *dev); int virtio_device_freeze(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev); #endif +void virtio_reset_device(struct virtio_device *dev); size_t virtio_max_dma_size(struct virtio_device *vdev); -- cgit v1.2.3 From 539fec78edb4e084e7c532affc56cc42d4ceea4b Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 26 Nov 2021 17:47:53 +0100 Subject: vdpa: add driver_override support `driver_override` allows to control which of the vDPA bus drivers binds to a vDPA device. If `driver_override` is not set, the previous behaviour is followed: devices use the first vDPA bus driver loaded (unless auto binding is disabled). Tested on Fedora 34 with driverctl(8): $ modprobe virtio-vdpa $ modprobe vhost-vdpa $ modprobe vdpa-sim-net $ vdpa dev add mgmtdev vdpasim_net name dev1 # dev1 is attached to the first vDPA bus driver loaded $ driverctl -b vdpa list-devices dev1 virtio_vdpa $ driverctl -b vdpa set-override dev1 vhost_vdpa $ driverctl -b vdpa list-devices dev1 vhost_vdpa [*] Note: driverctl(8) integrates with udev so the binding is preserved. Suggested-by: Jason Wang Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20211126164753.181829-3-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index c3011ccda430..ae34015b37b7 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -64,6 +64,7 @@ struct vdpa_mgmt_dev; * struct vdpa_device - representation of a vDPA device * @dev: underlying device * @dma_dev: the actual device that is performing DMA + * @driver_override: driver name to force a match * @config: the configuration ops for this device. * @cf_mutex: Protects get and set access to configuration layout. * @index: device index @@ -76,6 +77,7 @@ struct vdpa_mgmt_dev; struct vdpa_device { struct device dev; struct device *dma_dev; + const char *driver_override; const struct vdpa_config_ops *config; struct mutex cf_mutex; /* Protects get/set config */ unsigned int index; -- cgit v1.2.3 From 28cc408be72cebb0f3fcc37bc74ab3196d4de726 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 4 Nov 2021 20:52:48 +0100 Subject: vdpa: Mark vdpa_config_ops.get_vq_notification as optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since vhost_vdpa_mmap checks for its existence before calling it. Signed-off-by: Eugenio Pérez Link: https://lore.kernel.org/r/20211104195248.2088904-1-eperezma@redhat.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella --- include/linux/vdpa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index ae34015b37b7..2b7db96bb7d3 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -157,7 +157,7 @@ struct vdpa_map_file { * @vdev: vdpa device * @idx: virtqueue index * @state: pointer to returned state (last_avail_idx) - * @get_vq_notification: Get the notification area for a virtqueue + * @get_vq_notification: Get the notification area for a virtqueue (optional) * @vdev: vdpa device * @idx: virtqueue index * Returns the notifcation area -- cgit v1.2.3 From a64917bc2e9b1e0aa716b783c4ec879fdd280300 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:33 +0200 Subject: vdpa: Provide interface to read driver features Provide an interface to read the negotiated features. This is needed when building the netlink message in vdpa_dev_net_config_fill(). Also fix the implementation of vdpa_dev_net_config_fill() to use the negotiated features instead of the device features. To make APIs clearer, make the following name changes to struct vdpa_config_ops so they better describe their operations: get_features -> get_device_features set_features -> set_driver_features Finally, add get_driver_features to return the negotiated features and add implementation to all the upstream drivers. Acked-by: Jason Wang Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-2-elic@nvidia.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 2b7db96bb7d3..9cc4291a79b3 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -171,14 +171,17 @@ struct vdpa_map_file { * for the device * @vdev: vdpa device * Returns virtqueue algin requirement - * @get_features: Get virtio features supported by the device + * @get_device_features: Get virtio features supported by the device * @vdev: vdpa device * Returns the virtio features support by the * device - * @set_features: Set virtio features supported by the driver + * @set_driver_features: Set virtio features supported by the driver * @vdev: vdpa device * @features: feature support by the driver * Returns integer: success (0) or error (< 0) + * @get_driver_features: Get the virtio driver features in action + * @vdev: vdpa device + * Returns the virtio features accepted * @set_config_cb: Set the config interrupt callback * @vdev: vdpa device * @cb: virtio-vdev interrupt callback structure @@ -278,8 +281,9 @@ struct vdpa_config_ops { /* Device ops */ u32 (*get_vq_align)(struct vdpa_device *vdev); - u64 (*get_features)(struct vdpa_device *vdev); - int (*set_features)(struct vdpa_device *vdev, u64 features); + u64 (*get_device_features)(struct vdpa_device *vdev); + int (*set_driver_features)(struct vdpa_device *vdev, u64 features); + u64 (*get_driver_features)(struct vdpa_device *vdev); void (*set_config_cb)(struct vdpa_device *vdev, struct vdpa_callback *cb); u16 (*get_vq_num_max)(struct vdpa_device *vdev); @@ -397,7 +401,7 @@ static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features) const struct vdpa_config_ops *ops = vdev->config; vdev->features_valid = true; - return ops->set_features(vdev, features); + return ops->set_driver_features(vdev, features); } void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, -- cgit v1.2.3 From 73bc0dbb591baea322a7319c735e5f6c7dba9cfb Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:35 +0200 Subject: vdpa: Sync calls set/get config/status with cf_mutex Add wrappers to get/set status and protect these operations with cf_mutex to serialize these operations with respect to get/set config operations. Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-4-elic@nvidia.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 9cc4291a79b3..ae047fae2603 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -408,6 +408,9 @@ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len); void vdpa_set_config(struct vdpa_device *dev, unsigned int offset, const void *buf, unsigned int length); +u8 vdpa_get_status(struct vdpa_device *vdev); +void vdpa_set_status(struct vdpa_device *vdev, u8 status); + /** * struct vdpa_mgmtdev_ops - vdpa device ops * @dev_add: Add a vdpa device using alloc and register -- cgit v1.2.3 From aba21aff772b8622e08f07219069be793429a48f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:37 +0200 Subject: vdpa: Allow to configure max data virtqueues Add netlink support to configure the max virtqueue pairs for a device. At least one pair is required. The maximum is dictated by the device. Example: $ vdpa dev add name vdpa-a mgmtdev auxiliary/mlx5_core.sf.1 max_vqp 4 Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-6-elic@nvidia.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index ae047fae2603..6d4d7e4fe208 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -101,6 +101,7 @@ struct vdpa_dev_set_config { struct { u8 mac[ETH_ALEN]; u16 mtu; + u16 max_vq_pairs; } net; u64 mask; }; @@ -391,17 +392,29 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) static inline int vdpa_reset(struct vdpa_device *vdev) { const struct vdpa_config_ops *ops = vdev->config; + int ret; + mutex_lock(&vdev->cf_mutex); vdev->features_valid = false; - return ops->reset(vdev); + ret = ops->reset(vdev); + mutex_unlock(&vdev->cf_mutex); + return ret; } -static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features) +static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features, bool locked) { const struct vdpa_config_ops *ops = vdev->config; + int ret; + + if (!locked) + mutex_lock(&vdev->cf_mutex); vdev->features_valid = true; - return ops->set_driver_features(vdev, features); + ret = ops->set_driver_features(vdev, features); + if (!locked) + mutex_unlock(&vdev->cf_mutex); + + return ret; } void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, -- cgit v1.2.3 From 612f330ec56f12c0d099286c45f82d835845f136 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:40 +0200 Subject: vdpa: Add support for returning device configuration information Add netlink attribute to store the negotiated features. This can be used by userspace to get the current state of the vdpa instance. Examples: $ vdpa dev config show vdpa-a vdpa-a: mac 00:00:00:00:88:88 link up link_announce false max_vq_pairs 16 mtu 1500 negotiated_features CSUM GUEST_CSUM MTU MAC HOST_TSO4 HOST_TSO6 STATUS \ CTRL_VQ MQ CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM $ vdpa -j dev config show vdpa-a {"config":{"vdpa-a":{"mac":"00:00:00:00:88:88","link ":"up","link_announce":false, \ "max_vq_pairs":16,"mtu":1500,"negotiated_features":["CSUM","GUEST_CSUM","MTU","MAC", \ "HOST_TSO4","HOST_TSO6","STATUS","CTRL_VQ","MQ","CTRL_MAC_ADDR","VERSION_1", \ "ACCESS_PLATFORM"]}}} $ vdpa -jp dev config show vdpa-a { "config": { "vdpa-a": { "mac": "00:00:00:00:88:88", "link ": "up", "link_announce ": false, "max_vq_pairs": 16, "mtu": 1500, "negotiated_features": [ "CSUM","GUEST_CSUM","MTU","MAC","HOST_TSO4","HOST_TSO6","STATUS","CTRL_VQ","MQ", \ "CTRL_MAC_ADDR","VERSION_1","ACCESS_PLATFORM" ] } } } Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-9-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/uapi/linux/vdpa.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index a252f06f9dfd..db3738ef3beb 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -23,6 +23,9 @@ enum vdpa_command { enum vdpa_attr { VDPA_ATTR_UNSPEC, + /* Pad attribute for 64b alignment */ + VDPA_ATTR_PAD = VDPA_ATTR_UNSPEC, + /* bus name (optional) + dev name together make the parent device handle */ VDPA_ATTR_MGMTDEV_BUS_NAME, /* string */ VDPA_ATTR_MGMTDEV_DEV_NAME, /* string */ @@ -40,6 +43,7 @@ enum vdpa_attr { VDPA_ATTR_DEV_NET_CFG_MAX_VQP, /* u16 */ VDPA_ATTR_DEV_NET_CFG_MTU, /* u16 */ + VDPA_ATTR_DEV_NEGOTIATED_FEATURES, /* u64 */ /* new attributes must be added above here */ VDPA_ATTR_MAX, }; -- cgit v1.2.3 From cd2629f6df1cab5b3df34705ae7f3bde6147fce3 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 5 Jan 2022 13:46:42 +0200 Subject: vdpa: Support reporting max device capabilities Add max_supported_vqs and supported_features fields to struct vdpa_mgmt_dev. Upstream drivers need to feel these values according to the device capabilities. These values are reported back in a netlink message when showing management devices. Examples: $ auxiliary/mlx5_core.sf.1: supported_classes net max_supported_vqs 257 dev_features CSUM GUEST_CSUM MTU HOST_TSO4 HOST_TSO6 STATUS CTRL_VQ MQ \ CTRL_MAC_ADDR VERSION_1 ACCESS_PLATFORM $ vdpa -j mgmtdev show {"mgmtdev":{"auxiliary/mlx5_core.sf.1":{"supported_classes":["net"], \ "max_supported_vqs":257,"dev_features":["CSUM","GUEST_CSUM","MTU", \ "HOST_TSO4","HOST_TSO6","STATUS","CTRL_VQ","MQ","CTRL_MAC_ADDR", \ "VERSION_1","ACCESS_PLATFORM"]}}} $ vdpa -jp mgmtdev show { "mgmtdev": { "auxiliary/mlx5_core.sf.1": { "supported_classes": [ "net" ], "max_supported_vqs": 257, "dev_features": ["CSUM","GUEST_CSUM","MTU","HOST_TSO4", \ "HOST_TSO6","STATUS","CTRL_VQ","MQ", \ "CTRL_MAC_ADDR","VERSION_1","ACCESS_PLATFORM"] } } } Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220105114646.577224-11-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Si-Wei Liu --- include/linux/vdpa.h | 2 ++ include/uapi/linux/vdpa.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 6d4d7e4fe208..a6047fd6cf12 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -460,6 +460,8 @@ struct vdpa_mgmt_dev { const struct virtio_device_id *id_table; u64 config_attr_mask; struct list_head list; + u64 supported_features; + u32 max_supported_vqs; }; int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev); diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index db3738ef3beb..1061d8d2d09d 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -44,6 +44,8 @@ enum vdpa_attr { VDPA_ATTR_DEV_NET_CFG_MTU, /* u16 */ VDPA_ATTR_DEV_NEGOTIATED_FEATURES, /* u64 */ + VDPA_ATTR_DEV_MGMTDEV_MAX_VQS, /* u32 */ + VDPA_ATTR_DEV_SUPPORTED_FEATURES, /* u64 */ /* new attributes must be added above here */ VDPA_ATTR_MAX, }; -- cgit v1.2.3 From f6d955d80830b6e6f6a170be68cc3628f36365dd Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 11 Jan 2022 20:33:57 +0200 Subject: vdpa: Avoid taking cf_mutex lock on get status Avoid the wrapper holding cf_mutex since it is not protecting anything. To avoid confusion and unnecessary overhead incurred by it, remove. Fixes: f489f27bc0ab ("vdpa: Sync calls set/get config/status with cf_mutex") Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220111183400.38418-2-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Si-Wei Liu Acked-by: Jason Wang --- include/linux/vdpa.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index a6047fd6cf12..2de442ececae 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -421,7 +421,6 @@ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len); void vdpa_set_config(struct vdpa_device *dev, unsigned int offset, const void *buf, unsigned int length); -u8 vdpa_get_status(struct vdpa_device *vdev); void vdpa_set_status(struct vdpa_device *vdev, u8 status); /** -- cgit v1.2.3 From 800977f6f32e452cba6b04ef21d2f5383ca29209 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Fri, 14 Jan 2022 14:02:52 -0800 Subject: kthread: add the helper function kthread_run_on_cpu() Add a new helper function kthread_run_on_cpu(), which includes kthread_create_on_cpu/wake_up_process(). In some cases, use kthread_run_on_cpu() directly instead of kthread_create_on_node/kthread_bind/wake_up_process() or kthread_create_on_cpu/wake_up_process() or kthreadd_create/kthread_bind/wake_up_process() to simplify the code. [akpm@linux-foundation.org: export kthread_create_on_cpu to modules] Link: https://lkml.kernel.org/r/20211022025711.3673-2-caihuoqing@baidu.com Signed-off-by: Cai Huoqing Cc: Bernard Metzler Cc: Cai Huoqing Cc: Daniel Bristot de Oliveira Cc: Davidlohr Bueso Cc: Doug Ledford Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Joel Fernandes (Google) Cc: Josh Triplett Cc: Lai Jiangshan Cc: Mathieu Desnoyers Cc: "Paul E . McKenney" Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 346b0f269161..db47aae7c481 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -56,6 +56,31 @@ bool kthread_is_per_cpu(struct task_struct *k); __k; \ }) +/** + * kthread_run_on_cpu - create and wake a cpu bound thread. + * @threadfn: the function to run until signal_pending(current). + * @data: data ptr for @threadfn. + * @cpu: The cpu on which the thread should be bound, + * @namefmt: printf-style name for the thread. Format is restricted + * to "name.*%u". Code fills in cpu number. + * + * Description: Convenient wrapper for kthread_create_on_cpu() + * followed by wake_up_process(). Returns the kthread or + * ERR_PTR(-ENOMEM). + */ +static inline struct task_struct * +kthread_run_on_cpu(int (*threadfn)(void *data), void *data, + unsigned int cpu, const char *namefmt) +{ + struct task_struct *p; + + p = kthread_create_on_cpu(threadfn, data, cpu, namefmt); + if (!IS_ERR(p)) + wake_up_process(p); + + return p; +} + void free_kthread_struct(struct task_struct *k); void kthread_bind(struct task_struct *k, unsigned int cpu); void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask); -- cgit v1.2.3 From 60115fa54ad7b913b7cb5844e6b7ffeb842d55f2 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 14 Jan 2022 14:04:11 -0800 Subject: mm: defer kmemleak object creation of module_alloc() Yongqiang reports a kmemleak panic when module insmod/rmmod with KASAN enabled(without KASAN_VMALLOC) on x86[1]. When the module area allocates memory, it's kmemleak_object is created successfully, but the KASAN shadow memory of module allocation is not ready, so when kmemleak scan the module's pointer, it will panic due to no shadow memory with KASAN check. module_alloc __vmalloc_node_range kmemleak_vmalloc kmemleak_scan update_checksum kasan_module_alloc kmemleak_ignore Note, there is no problem if KASAN_VMALLOC enabled, the modules area entire shadow memory is preallocated. Thus, the bug only exits on ARCH which supports dynamic allocation of module area per module load, for now, only x86/arm64/s390 are involved. Add a VM_DEFER_KMEMLEAK flags, defer vmalloc'ed object register of kmemleak in module_alloc() to fix this issue. [1] https://lore.kernel.org/all/6d41e2b9-4692-5ec4-b1cd-cbe29ae89739@huawei.com/ [wangkefeng.wang@huawei.com: fix build] Link: https://lkml.kernel.org/r/20211125080307.27225-1-wangkefeng.wang@huawei.com [akpm@linux-foundation.org: simplify ifdefs, per Andrey] Link: https://lkml.kernel.org/r/CA+fCnZcnwJHUQq34VuRxpdoY6_XbJCDJ-jopksS5Eia4PijPzw@mail.gmail.com Link: https://lkml.kernel.org/r/20211124142034.192078-1-wangkefeng.wang@huawei.com Fixes: 793213a82de4 ("s390/kasan: dynamic shadow mem allocation for modules") Fixes: 39d114ddc682 ("arm64: add KASAN support") Fixes: bebf56a1b176 ("kasan: enable instrumentation of global variables") Signed-off-by: Kefeng Wang Reported-by: Yongqiang Liu Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Catalin Marinas Cc: Will Deacon Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Alexander Gordeev Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Alexander Potapenko Cc: Kefeng Wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 4 ++-- include/linux/vmalloc.h | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d8783b682669..89c99e5e67de 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -474,12 +474,12 @@ static inline void kasan_populate_early_vm_area_shadow(void *start, * allocations with real shadow memory. With KASAN vmalloc, the special * case is unnecessary, as the work is handled in the generic case. */ -int kasan_module_alloc(void *addr, size_t size); +int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask); void kasan_free_shadow(const struct vm_struct *vm); #else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ -static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } +static inline int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 6e022cc712e6..880227b9f044 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -28,6 +28,13 @@ struct notifier_block; /* in notifier.h */ #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ #define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */ +#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ + !defined(CONFIG_KASAN_VMALLOC) +#define VM_DEFER_KMEMLEAK 0x00000800 /* defer kmemleak object creation */ +#else +#define VM_DEFER_KMEMLEAK 0 +#endif + /* * VM_KASAN is used slightly differently depending on CONFIG_KASAN_VMALLOC. * -- cgit v1.2.3 From c4386bd8ee3a921c3c799b7197dc898ade76a453 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 14 Jan 2022 14:04:22 -0800 Subject: mm/memremap: add ZONE_DEVICE support for compound pages Add a new @vmemmap_shift property for struct dev_pagemap which specifies that a devmap is composed of a set of compound pages of order @vmemmap_shift, instead of base pages. When a compound page devmap is requested, all but the first page are initialised as tail pages instead of order-0 pages. For certain ZONE_DEVICE users like device-dax which have a fixed page size, this creates an opportunity to optimize GUP and GUP-fast walkers, treating it the same way as THP or hugetlb pages. Additionally, commit 7118fc2906e2 ("hugetlb: address ref count racing in prep_compound_gigantic_page") removed set_page_count() because the setting of page ref count to zero was redundant. devmap pages don't come from page allocator though and only head page refcount is used for compound pages, hence initialize tail page count to zero. Link: https://lkml.kernel.org/r/20211202204422.26777-5-joao.m.martins@oracle.com Signed-off-by: Joao Martins Reviewed-by: Dan Williams Cc: Christoph Hellwig Cc: Dave Jiang Cc: Jane Chu Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: Jonathan Corbet Cc: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Muchun Song Cc: Naoya Horiguchi Cc: Vishal Verma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memremap.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index c0e9d35889e8..61a6a0e27359 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -99,6 +99,11 @@ struct dev_pagemap_ops { * @done: completion for @internal_ref * @type: memory type: see MEMORY_* in memory_hotplug.h * @flags: PGMAP_* flags to specify defailed behavior + * @vmemmap_shift: structural definition of how the vmemmap page metadata + * is populated, specifically the metadata page order. + * A zero value (default) uses base pages as the vmemmap metadata + * representation. A bigger value will set up compound struct pages + * of the requested order value. * @ops: method table * @owner: an opaque pointer identifying the entity that manages this * instance. Used by various helpers to make sure that no @@ -114,6 +119,7 @@ struct dev_pagemap { struct completion done; enum memory_type type; unsigned int flags; + unsigned long vmemmap_shift; const struct dev_pagemap_ops *ops; void *owner; int nr_range; @@ -130,6 +136,11 @@ static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) return NULL; } +static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap) +{ + return 1 << pgmap->vmemmap_shift; +} + #ifdef CONFIG_ZONE_DEVICE void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); -- cgit v1.2.3 From 3e9d80a891df3b1a5d77db47fa7fdf33ba71e5cb Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Jan 2022 14:05:04 -0800 Subject: mm,fs: split dump_mapping() out from dump_page() dump_mapping() is a big chunk of dump_page(), and it'd be handy to be able to call it when we don't have a struct page. Split it out and move it to fs/inode.c. Take the opportunity to simplify some of the debug messages a little. Link: https://lkml.kernel.org/r/20211121121056.2870061-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski Acked-by: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index bbf812ce89a8..5315fa68f751 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3152,6 +3152,7 @@ extern void unlock_new_inode(struct inode *); extern void discard_new_inode(struct inode *); extern unsigned int get_next_ino(void); extern void evict_inodes(struct super_block *sb); +void dump_mapping(const struct address_space *); /* * Userspace may rely on the the inode number being non-zero. For example, glibc -- cgit v1.2.3 From b6bf9abb0aa44e53ffe9c1e6e1d32568f5b25e4a Mon Sep 17 00:00:00 2001 From: Dan Schatzberg Date: Fri, 14 Jan 2022 14:05:35 -0800 Subject: mm/memcg: add oom_group_kill memory event Our container agent wants to know when a container exits if it was OOM killed or not to report to the user. We use memory.oom.group = 1 to ensure that OOM kills within the container's cgroup kill everything. Existing memory.events are insufficient for knowing if this triggered: 1) Our current approach reads memory.events oom_kill and reports the container was killed if the value is non-zero. This is erroneous in some cases where containers create their children cgroups with memory.oom.group=1 as such OOM kills will get counted against the container cgroup's oom_kill counter despite not actually OOM killing the entire container. 2) Reading memory.events.local will fail to identify OOM kills in leaf cgroups (that don't set memory.oom.group) within the container cgroup. This patch adds a new oom_group_kill event when memory.oom.group triggers to allow userspace to cleanly identify when an entire cgroup is oom killed. [schatzberg.dan@gmail.com: changes from Johannes and Chris] Link: https://lkml.kernel.org/r/20211213162511.2492267-1-schatzberg.dan@gmail.com Link: https://lkml.kernel.org/r/20211203162426.3375036-1-schatzberg.dan@gmail.com Signed-off-by: Dan Schatzberg Reviewed-by: Roman Gushchin Acked-by: Johannes Weiner Acked-by: Chris Down Reviewed-by: Shakeel Butt Acked-by: Michal Hocko Cc: Tejun Heo Cc: Zefan Li Cc: Jonathan Corbet Cc: Vladimir Davydov Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Cc: Alex Shi Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0c5c403f4be6..951f24f42147 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -42,6 +42,7 @@ enum memcg_memory_event { MEMCG_MAX, MEMCG_OOM, MEMCG_OOM_KILL, + MEMCG_OOM_GROUP_KILL, MEMCG_SWAP_HIGH, MEMCG_SWAP_MAX, MEMCG_SWAP_FAIL, -- cgit v1.2.3 From 4e5aa1f4c2b489bc6f3ab5ca54747b18a847289d Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Fri, 14 Jan 2022 14:05:45 -0800 Subject: memcg: add per-memcg vmalloc stat The kvmalloc* allocation functions can fallback to vmalloc allocations and more often on long running machines. In addition the kernel does have __GFP_ACCOUNT kvmalloc* calls. So, often on long running machines, the memory.stat does not tell the complete picture which type of memory is charged to the memcg. So add a per-memcg vmalloc stat. [shakeelb@google.com: page_memcg() within rcu lock, per Muchun] Link: https://lkml.kernel.org/r/20211222052457.1960701-1-shakeelb@google.com [akpm@linux-foundation.org: remove cast, per Muchun] [shakeelb@google.com: remove area->page[0] checks and move to page by page accounting per Michal] Link: https://lkml.kernel.org/r/20220104222341.3972772-1-shakeelb@google.com Link: https://lkml.kernel.org/r/20211221215336.1922823-1-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Roman Gushchin Reviewed-by: Muchun Song Acked-by: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 951f24f42147..0131e5574c88 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -33,6 +33,7 @@ enum memcg_stat_item { MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, MEMCG_SOCK, MEMCG_PERCPU_B, + MEMCG_VMALLOC, MEMCG_NR_STAT, }; @@ -992,6 +993,21 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, local_irq_restore(flags); } +static inline void mod_memcg_page_state(struct page *page, + int idx, int val) +{ + struct mem_cgroup *memcg; + + if (mem_cgroup_disabled()) + return; + + rcu_read_lock(); + memcg = page_memcg(page); + if (memcg) + mod_memcg_state(memcg, idx, val); + rcu_read_unlock(); +} + static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { return READ_ONCE(memcg->vmstats.state[idx]); @@ -1447,6 +1463,11 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, { } +static inline void mod_memcg_page_state(struct page *page, + int idx, int val) +{ +} + static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { return 0; -- cgit v1.2.3 From 9a10064f5625d5572c3626c1516e0bebc6c9fe9b Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Fri, 14 Jan 2022 14:05:59 -0800 Subject: mm: add a field to store names for private anonymous memory In many userspace applications, and especially in VM based applications like Android uses heavily, there are multiple different allocators in use. At a minimum there is libc malloc and the stack, and in many cases there are libc malloc, the stack, direct syscalls to mmap anonymous memory, and multiple VM heaps (one for small objects, one for big objects, etc.). Each of these layers usually has its own tools to inspect its usage; malloc by compiling a debug version, the VM through heap inspection tools, and for direct syscalls there is usually no way to track them. On Android we heavily use a set of tools that use an extended version of the logic covered in Documentation/vm/pagemap.txt to walk all pages mapped in userspace and slice their usage by process, shared (COW) vs. unique mappings, backing, etc. This can account for real physical memory usage even in cases like fork without exec (which Android uses heavily to share as many private COW pages as possible between processes), Kernel SamePage Merging, and clean zero pages. It produces a measurement of the pages that only exist in that process (USS, for unique), and a measurement of the physical memory usage of that process with the cost of shared pages being evenly split between processes that share them (PSS). If all anonymous memory is indistinguishable then figuring out the real physical memory usage (PSS) of each heap requires either a pagemap walking tool that can understand the heap debugging of every layer, or for every layer's heap debugging tools to implement the pagemap walking logic, in which case it is hard to get a consistent view of memory across the whole system. Tracking the information in userspace leads to all sorts of problems. It either needs to be stored inside the process, which means every process has to have an API to export its current heap information upon request, or it has to be stored externally in a filesystem that somebody needs to clean up on crashes. It needs to be readable while the process is still running, so it has to have some sort of synchronization with every layer of userspace. Efficiently tracking the ranges requires reimplementing something like the kernel vma trees, and linking to it from every layer of userspace. It requires more memory, more syscalls, more runtime cost, and more complexity to separately track regions that the kernel is already tracking. This patch adds a field to /proc/pid/maps and /proc/pid/smaps to show a userspace-provided name for anonymous vmas. The names of named anonymous vmas are shown in /proc/pid/maps and /proc/pid/smaps as [anon:]. Userspace can set the name for a region of memory by calling prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, start, len, (unsigned long)name) Setting the name to NULL clears it. The name length limit is 80 bytes including NUL-terminator and is checked to contain only printable ascii characters (including space), except '[',']','\','$' and '`'. Ascii strings are being used to have a descriptive identifiers for vmas, which can be understood by the users reading /proc/pid/maps or /proc/pid/smaps. Names can be standardized for a given system and they can include some variable parts such as the name of the allocator or a library, tid of the thread using it, etc. The name is stored in a pointer in the shared union in vm_area_struct that points to a null terminated string. Anonymous vmas with the same name (equivalent strings) and are otherwise mergeable will be merged. The name pointers are not shared between vmas even if they contain the same name. The name pointer is stored in a union with fields that are only used on file-backed mappings, so it does not increase memory usage. CONFIG_ANON_VMA_NAME kernel configuration is introduced to enable this feature. It keeps the feature disabled by default to prevent any additional memory overhead and to avoid confusing procfs parsers on systems which are not ready to support named anonymous vmas. The patch is based on the original patch developed by Colin Cross, more specifically on its latest version [1] posted upstream by Sumit Semwal. It used a userspace pointer to store vma names. In that design, name pointers could be shared between vmas. However during the last upstreaming attempt, Kees Cook raised concerns [2] about this approach and suggested to copy the name into kernel memory space, perform validity checks [3] and store as a string referenced from vm_area_struct. One big concern is about fork() performance which would need to strdup anonymous vma names. Dave Hansen suggested experimenting with worst-case scenario of forking a process with 64k vmas having longest possible names [4]. I ran this experiment on an ARM64 Android device and recorded a worst-case regression of almost 40% when forking such a process. This regression is addressed in the followup patch which replaces the pointer to a name with a refcounted structure that allows sharing the name pointer between vmas of the same name. Instead of duplicating the string during fork() or when splitting a vma it increments the refcount. [1] https://lore.kernel.org/linux-mm/20200901161459.11772-4-sumit.semwal@linaro.org/ [2] https://lore.kernel.org/linux-mm/202009031031.D32EF57ED@keescook/ [3] https://lore.kernel.org/linux-mm/202009031022.3834F692@keescook/ [4] https://lore.kernel.org/linux-mm/5d0358ab-8c47-2f5f-8e43-23b89d6a8e95@intel.com/ Changes for prctl(2) manual page (in the options section): PR_SET_VMA Sets an attribute specified in arg2 for virtual memory areas starting from the address specified in arg3 and spanning the size specified in arg4. arg5 specifies the value of the attribute to be set. Note that assigning an attribute to a virtual memory area might prevent it from being merged with adjacent virtual memory areas due to the difference in that attribute's value. Currently, arg2 must be one of: PR_SET_VMA_ANON_NAME Set a name for anonymous virtual memory areas. arg5 should be a pointer to a null-terminated string containing the name. The name length including null byte cannot exceed 80 bytes. If arg5 is NULL, the name of the appropriate anonymous virtual memory areas will be reset. The name can contain only printable ascii characters (including space), except '[',']','\','$' and '`'. This feature is available only if the kernel is built with the CONFIG_ANON_VMA_NAME option enabled. [surenb@google.com: docs: proc.rst: /proc/PID/maps: fix malformed table] Link: https://lkml.kernel.org/r/20211123185928.2513763-1-surenb@google.com [surenb: rebased over v5.15-rc6, replaced userpointer with a kernel copy, added input sanitization and CONFIG_ANON_VMA_NAME config. The bulk of the work here was done by Colin Cross, therefore, with his permission, keeping him as the author] Link: https://lkml.kernel.org/r/20211019215511.3771969-2-surenb@google.com Signed-off-by: Colin Cross Signed-off-by: Suren Baghdasaryan Reviewed-by: Kees Cook Cc: Stephen Rothwell Cc: Al Viro Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: David Rientjes Cc: "Eric W. Biederman" Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jan Glauber Cc: Johannes Weiner Cc: John Stultz Cc: Mel Gorman Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Rob Landley Cc: "Serge E. Hallyn" Cc: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 13 +++++++++- include/linux/mm_types.h | 64 +++++++++++++++++++++++++++++++++++++++++++--- include/uapi/linux/prctl.h | 3 +++ 3 files changed, 75 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a7e4a9e7d807..7000442984b9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2658,7 +2658,7 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, extern struct vm_area_struct *vma_merge(struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, - struct mempolicy *, struct vm_userfaultfd_ctx); + struct mempolicy *, struct vm_userfaultfd_ctx, const char *); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int __split_vma(struct mm_struct *, struct vm_area_struct *, unsigned long addr, int new_below); @@ -3391,5 +3391,16 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) return 0; } +#ifdef CONFIG_ANON_VMA_NAME +int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, + unsigned long len_in, const char *name); +#else +static inline int +madvise_set_anon_name(struct mm_struct *mm, unsigned long start, + unsigned long len_in, const char *name) { + return 0; +} +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c3a6e6209600..799e2ee626b2 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -426,11 +426,19 @@ struct vm_area_struct { /* * For areas with an address space and backing store, * linkage into the address_space->i_mmap interval tree. + * + * For private anonymous mappings, a pointer to a null terminated string + * containing the name given to the vma, or NULL if unnamed. */ - struct { - struct rb_node rb; - unsigned long rb_subtree_last; - } shared; + + union { + struct { + struct rb_node rb; + unsigned long rb_subtree_last; + } shared; + /* Serialized by mmap_sem. */ + char *anon_name; + }; /* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma @@ -875,4 +883,52 @@ typedef struct { unsigned long val; } swp_entry_t; +#ifdef CONFIG_ANON_VMA_NAME +/* + * mmap_lock should be read-locked when calling vma_anon_name() and while using + * the returned pointer. + */ +extern const char *vma_anon_name(struct vm_area_struct *vma); + +/* + * mmap_lock should be read-locked for orig_vma->vm_mm. + * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be + * isolated. + */ +extern void dup_vma_anon_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma); + +/* + * mmap_lock should be write-locked or vma should have been isolated under + * write-locked mmap_lock protection. + */ +extern void free_vma_anon_name(struct vm_area_struct *vma); + +/* mmap_lock should be read-locked */ +static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, + const char *name) +{ + const char *vma_name = vma_anon_name(vma); + + /* either both NULL, or pointers to same string */ + if (vma_name == name) + return true; + + return name && vma_name && !strcmp(name, vma_name); +} +#else /* CONFIG_ANON_VMA_NAME */ +static inline const char *vma_anon_name(struct vm_area_struct *vma) +{ + return NULL; +} +static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma) {} +static inline void free_vma_anon_name(struct vm_area_struct *vma) {} +static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, + const char *name) +{ + return true; +} +#endif /* CONFIG_ANON_VMA_NAME */ + #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index bb73e9a0b24f..e998764f0262 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -272,4 +272,7 @@ struct prctl_mm_map { # define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 # define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 +#define PR_SET_VMA 0x53564d41 +# define PR_SET_VMA_ANON_NAME 0 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 78db3412833dc9c479cd17412035f216cfd01a29 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 14 Jan 2022 14:06:03 -0800 Subject: mm: add anonymous vma name refcounting While forking a process with high number (64K) of named anonymous vmas the overhead caused by strdup() is noticeable. Experiments with ARM64 Android device show up to 40% performance regression when forking a process with 64k unpopulated anonymous vmas using the max name lengths vs the same process with the same number of anonymous vmas having no name. Introduce anon_vma_name refcounted structure to avoid the overhead of copying vma names during fork() and when splitting named anonymous vmas. When a vma is duplicated, instead of copying the name we increment the refcount of this structure. Multiple vmas can point to the same anon_vma_name as long as they increment the refcount. The name member of anon_vma_name structure is assigned at structure allocation time and is never changed. If vma name changes then the refcount of the original structure is dropped, a new anon_vma_name structure is allocated to hold the new name and the vma pointer is updated to point to the new structure. With this approach the fork() performance regressions is reduced 3-4x times and with usecases using more reasonable number of VMAs (a few thousand) the regressions is not measurable. Link: https://lkml.kernel.org/r/20211019215511.3771969-3-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Kees Cook Cc: Al Viro Cc: Colin Cross Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: David Rientjes Cc: "Eric W. Biederman" Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jan Glauber Cc: Johannes Weiner Cc: John Stultz Cc: Mel Gorman Cc: Minchan Kim Cc: Oleg Nesterov Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Rob Landley Cc: "Serge E. Hallyn" Cc: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 799e2ee626b2..449b6eafc695 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -386,6 +387,12 @@ struct vm_userfaultfd_ctx { struct vm_userfaultfd_ctx {}; #endif /* CONFIG_USERFAULTFD */ +struct anon_vma_name { + struct kref kref; + /* The name needs to be at the end because it is dynamically sized. */ + char name[]; +}; + /* * This struct describes a virtual memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -437,7 +444,7 @@ struct vm_area_struct { unsigned long rb_subtree_last; } shared; /* Serialized by mmap_sem. */ - char *anon_name; + struct anon_vma_name *anon_name; }; /* -- cgit v1.2.3 From 17fca131cee21724ee953a17c185c14e9533af5b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Jan 2022 14:06:07 -0800 Subject: mm: move anon_vma declarations to linux/mm_inline.h The patch to add anonymous vma names causes a build failure in some configurations: include/linux/mm_types.h: In function 'is_same_vma_anon_name': include/linux/mm_types.h:924:37: error: implicit declaration of function 'strcmp' [-Werror=implicit-function-declaration] 924 | return name && vma_name && !strcmp(name, vma_name); | ^~~~~~ include/linux/mm_types.h:22:1: note: 'strcmp' is defined in header ''; did you forget to '#include '? This should not really be part of linux/mm_types.h in the first place, as that header is meant to only contain structure defintions and need a minimum set of indirect includes itself. While the header clearly includes more than it should at this point, let's not make it worse by including string.h as well, which would pull in the expensive (compile-speed wise) fortify-string logic. Move the new functions into a separate header that only needs to be included in a couple of locations. Link: https://lkml.kernel.org/r/20211207125710.2503446-1-arnd@kernel.org Fixes: "mm: add a field to store names for private anonymous memory" Signed-off-by: Arnd Bergmann Cc: Al Viro Cc: Colin Cross Cc: Eric Biederman Cc: Kees Cook Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Stephen Rothwell Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 48 --------------------------------------------- 2 files changed, 50 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index e2ec68b0515c..47d96d2647ca 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -4,6 +4,7 @@ #include #include +#include /** * folio_is_file_lru - Should the folio be on a file LRU or anon LRU? @@ -135,4 +136,53 @@ static __always_inline void del_page_from_lru_list(struct page *page, { lruvec_del_folio(lruvec, page_folio(page)); } + +#ifdef CONFIG_ANON_VMA_NAME +/* + * mmap_lock should be read-locked when calling vma_anon_name() and while using + * the returned pointer. + */ +extern const char *vma_anon_name(struct vm_area_struct *vma); + +/* + * mmap_lock should be read-locked for orig_vma->vm_mm. + * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be + * isolated. + */ +extern void dup_vma_anon_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma); + +/* + * mmap_lock should be write-locked or vma should have been isolated under + * write-locked mmap_lock protection. + */ +extern void free_vma_anon_name(struct vm_area_struct *vma); + +/* mmap_lock should be read-locked */ +static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, + const char *name) +{ + const char *vma_name = vma_anon_name(vma); + + /* either both NULL, or pointers to same string */ + if (vma_name == name) + return true; + + return name && vma_name && !strcmp(name, vma_name); +} +#else /* CONFIG_ANON_VMA_NAME */ +static inline const char *vma_anon_name(struct vm_area_struct *vma) +{ + return NULL; +} +static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma) {} +static inline void free_vma_anon_name(struct vm_area_struct *vma) {} +static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, + const char *name) +{ + return true; +} +#endif /* CONFIG_ANON_VMA_NAME */ + #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 449b6eafc695..4d5fb84eed5e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -890,52 +890,4 @@ typedef struct { unsigned long val; } swp_entry_t; -#ifdef CONFIG_ANON_VMA_NAME -/* - * mmap_lock should be read-locked when calling vma_anon_name() and while using - * the returned pointer. - */ -extern const char *vma_anon_name(struct vm_area_struct *vma); - -/* - * mmap_lock should be read-locked for orig_vma->vm_mm. - * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be - * isolated. - */ -extern void dup_vma_anon_name(struct vm_area_struct *orig_vma, - struct vm_area_struct *new_vma); - -/* - * mmap_lock should be write-locked or vma should have been isolated under - * write-locked mmap_lock protection. - */ -extern void free_vma_anon_name(struct vm_area_struct *vma); - -/* mmap_lock should be read-locked */ -static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, - const char *name) -{ - const char *vma_name = vma_anon_name(vma); - - /* either both NULL, or pointers to same string */ - if (vma_name == name) - return true; - - return name && vma_name && !strcmp(name, vma_name); -} -#else /* CONFIG_ANON_VMA_NAME */ -static inline const char *vma_anon_name(struct vm_area_struct *vma) -{ - return NULL; -} -static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma, - struct vm_area_struct *new_vma) {} -static inline void free_vma_anon_name(struct vm_area_struct *vma) {} -static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, - const char *name) -{ - return true; -} -#endif /* CONFIG_ANON_VMA_NAME */ - #endif /* _LINUX_MM_TYPES_H */ -- cgit v1.2.3 From 36090def7bad06a6346f86a7cfdbfda2d138cb64 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Jan 2022 14:06:10 -0800 Subject: mm: move tlb_flush_pending inline helpers to mm_inline.h linux/mm_types.h should only define structure definitions, to make it cheap to include elsewhere. The atomic_t helper function definitions are particularly large, so it's better to move the helpers using those into the existing linux/mm_inline.h and only include that where needed. As a follow-up, we may want to go through all the indirect includes in mm_types.h and reduce them as much as possible. Link: https://lkml.kernel.org/r/20211207125710.2503446-2-arnd@kernel.org Signed-off-by: Arnd Bergmann Cc: Al Viro Cc: Stephen Rothwell Cc: Suren Baghdasaryan Cc: Colin Cross Cc: Kees Cook Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Yu Zhao Cc: Vlastimil Babka Cc: Matthew Wilcox (Oracle) Cc: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 45 ---------------- include/linux/mm_inline.h | 86 +++++++++++++++++++++++++++++++ include/linux/mm_types.h | 129 ++++++++++++++++------------------------------ 3 files changed, 131 insertions(+), 129 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7000442984b9..c17e5cfc1e47 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -424,51 +424,6 @@ extern unsigned int kobjsize(const void *objp); */ extern pgprot_t protection_map[16]; -/** - * enum fault_flag - Fault flag definitions. - * @FAULT_FLAG_WRITE: Fault was a write fault. - * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE. - * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked. - * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying. - * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region. - * @FAULT_FLAG_TRIED: The fault has been tried once. - * @FAULT_FLAG_USER: The fault originated in userspace. - * @FAULT_FLAG_REMOTE: The fault is not for current task/mm. - * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch. - * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals. - * - * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify - * whether we would allow page faults to retry by specifying these two - * fault flags correctly. Currently there can be three legal combinations: - * - * (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and - * this is the first try - * - * (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and - * we've already tried at least once - * - * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry - * - * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never - * be used. Note that page faults can be allowed to retry for multiple times, - * in which case we'll have an initial fault with flags (a) then later on - * continuous faults with flags (b). We should always try to detect pending - * signals before a retry to make sure the continuous page faults can still be - * interrupted if necessary. - */ -enum fault_flag { - FAULT_FLAG_WRITE = 1 << 0, - FAULT_FLAG_MKWRITE = 1 << 1, - FAULT_FLAG_ALLOW_RETRY = 1 << 2, - FAULT_FLAG_RETRY_NOWAIT = 1 << 3, - FAULT_FLAG_KILLABLE = 1 << 4, - FAULT_FLAG_TRIED = 1 << 5, - FAULT_FLAG_USER = 1 << 6, - FAULT_FLAG_REMOTE = 1 << 7, - FAULT_FLAG_INSTRUCTION = 1 << 8, - FAULT_FLAG_INTERRUPTIBLE = 1 << 9, -}; - /* * The default fault flags that should be used by most of the * arch-specific page fault handlers. diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 47d96d2647ca..b725839dfe71 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -2,6 +2,7 @@ #ifndef LINUX_MM_INLINE_H #define LINUX_MM_INLINE_H +#include #include #include #include @@ -185,4 +186,89 @@ static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, } #endif /* CONFIG_ANON_VMA_NAME */ +static inline void init_tlb_flush_pending(struct mm_struct *mm) +{ + atomic_set(&mm->tlb_flush_pending, 0); +} + +static inline void inc_tlb_flush_pending(struct mm_struct *mm) +{ + atomic_inc(&mm->tlb_flush_pending); + /* + * The only time this value is relevant is when there are indeed pages + * to flush. And we'll only flush pages after changing them, which + * requires the PTL. + * + * So the ordering here is: + * + * atomic_inc(&mm->tlb_flush_pending); + * spin_lock(&ptl); + * ... + * set_pte_at(); + * spin_unlock(&ptl); + * + * spin_lock(&ptl) + * mm_tlb_flush_pending(); + * .... + * spin_unlock(&ptl); + * + * flush_tlb_range(); + * atomic_dec(&mm->tlb_flush_pending); + * + * Where the increment if constrained by the PTL unlock, it thus + * ensures that the increment is visible if the PTE modification is + * visible. After all, if there is no PTE modification, nobody cares + * about TLB flushes either. + * + * This very much relies on users (mm_tlb_flush_pending() and + * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and + * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc + * locks (PPC) the unlock of one doesn't order against the lock of + * another PTL. + * + * The decrement is ordered by the flush_tlb_range(), such that + * mm_tlb_flush_pending() will not return false unless all flushes have + * completed. + */ +} + +static inline void dec_tlb_flush_pending(struct mm_struct *mm) +{ + /* + * See inc_tlb_flush_pending(). + * + * This cannot be smp_mb__before_atomic() because smp_mb() simply does + * not order against TLB invalidate completion, which is what we need. + * + * Therefore we must rely on tlb_flush_*() to guarantee order. + */ + atomic_dec(&mm->tlb_flush_pending); +} + +static inline bool mm_tlb_flush_pending(struct mm_struct *mm) +{ + /* + * Must be called after having acquired the PTL; orders against that + * PTLs release and therefore ensures that if we observe the modified + * PTE we must also observe the increment from inc_tlb_flush_pending(). + * + * That is, it only guarantees to return true if there is a flush + * pending for _this_ PTL. + */ + return atomic_read(&mm->tlb_flush_pending); +} + +static inline bool mm_tlb_flush_nested(struct mm_struct *mm) +{ + /* + * Similar to mm_tlb_flush_pending(), we must have acquired the PTL + * for which there is a TLB flush pending in order to guarantee + * we've seen both that PTE modification and the increment. + * + * (no requirement on actually still holding the PTL, that is irrelevant) + */ + return atomic_read(&mm->tlb_flush_pending) > 1; +} + + #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4d5fb84eed5e..6a89f128c990 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -692,90 +692,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_finish_mmu(struct mmu_gather *tlb); -static inline void init_tlb_flush_pending(struct mm_struct *mm) -{ - atomic_set(&mm->tlb_flush_pending, 0); -} - -static inline void inc_tlb_flush_pending(struct mm_struct *mm) -{ - atomic_inc(&mm->tlb_flush_pending); - /* - * The only time this value is relevant is when there are indeed pages - * to flush. And we'll only flush pages after changing them, which - * requires the PTL. - * - * So the ordering here is: - * - * atomic_inc(&mm->tlb_flush_pending); - * spin_lock(&ptl); - * ... - * set_pte_at(); - * spin_unlock(&ptl); - * - * spin_lock(&ptl) - * mm_tlb_flush_pending(); - * .... - * spin_unlock(&ptl); - * - * flush_tlb_range(); - * atomic_dec(&mm->tlb_flush_pending); - * - * Where the increment if constrained by the PTL unlock, it thus - * ensures that the increment is visible if the PTE modification is - * visible. After all, if there is no PTE modification, nobody cares - * about TLB flushes either. - * - * This very much relies on users (mm_tlb_flush_pending() and - * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and - * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc - * locks (PPC) the unlock of one doesn't order against the lock of - * another PTL. - * - * The decrement is ordered by the flush_tlb_range(), such that - * mm_tlb_flush_pending() will not return false unless all flushes have - * completed. - */ -} - -static inline void dec_tlb_flush_pending(struct mm_struct *mm) -{ - /* - * See inc_tlb_flush_pending(). - * - * This cannot be smp_mb__before_atomic() because smp_mb() simply does - * not order against TLB invalidate completion, which is what we need. - * - * Therefore we must rely on tlb_flush_*() to guarantee order. - */ - atomic_dec(&mm->tlb_flush_pending); -} - -static inline bool mm_tlb_flush_pending(struct mm_struct *mm) -{ - /* - * Must be called after having acquired the PTL; orders against that - * PTLs release and therefore ensures that if we observe the modified - * PTE we must also observe the increment from inc_tlb_flush_pending(). - * - * That is, it only guarantees to return true if there is a flush - * pending for _this_ PTL. - */ - return atomic_read(&mm->tlb_flush_pending); -} - -static inline bool mm_tlb_flush_nested(struct mm_struct *mm) -{ - /* - * Similar to mm_tlb_flush_pending(), we must have acquired the PTL - * for which there is a TLB flush pending in order to guarantee - * we've seen both that PTE modification and the increment. - * - * (no requirement on actually still holding the PTL, that is irrelevant) - */ - return atomic_read(&mm->tlb_flush_pending) > 1; -} - struct vm_fault; /** @@ -890,4 +806,49 @@ typedef struct { unsigned long val; } swp_entry_t; +/** + * enum fault_flag - Fault flag definitions. + * @FAULT_FLAG_WRITE: Fault was a write fault. + * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE. + * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked. + * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying. + * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region. + * @FAULT_FLAG_TRIED: The fault has been tried once. + * @FAULT_FLAG_USER: The fault originated in userspace. + * @FAULT_FLAG_REMOTE: The fault is not for current task/mm. + * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch. + * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals. + * + * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify + * whether we would allow page faults to retry by specifying these two + * fault flags correctly. Currently there can be three legal combinations: + * + * (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and + * this is the first try + * + * (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and + * we've already tried at least once + * + * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry + * + * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never + * be used. Note that page faults can be allowed to retry for multiple times, + * in which case we'll have an initial fault with flags (a) then later on + * continuous faults with flags (b). We should always try to detect pending + * signals before a retry to make sure the continuous page faults can still be + * interrupted if necessary. + */ +enum fault_flag { + FAULT_FLAG_WRITE = 1 << 0, + FAULT_FLAG_MKWRITE = 1 << 1, + FAULT_FLAG_ALLOW_RETRY = 1 << 2, + FAULT_FLAG_RETRY_NOWAIT = 1 << 3, + FAULT_FLAG_KILLABLE = 1 << 4, + FAULT_FLAG_TRIED = 1 << 5, + FAULT_FLAG_USER = 1 << 6, + FAULT_FLAG_REMOTE = 1 << 7, + FAULT_FLAG_INSTRUCTION = 1 << 8, + FAULT_FLAG_INTERRUPTIBLE = 1 << 9, +}; + #endif /* _LINUX_MM_TYPES_H */ -- cgit v1.2.3 From cc6dcfee72509868271d42919a3c1081b6b0dc7e Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 14 Jan 2022 14:06:18 -0800 Subject: mm: document locking restrictions for vm_operations_struct::close Add comments for vm_operations_struct::close documenting locking requirements for this callback and its callers. Link: https://lkml.kernel.org/r/20211209191325.3069345-2-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Michal Hocko Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Christian Brauner Cc: Christian Brauner Cc: Christoph Hellwig Cc: David Hildenbrand Cc: David Rientjes Cc: Florian Weimer Cc: Jan Engelhardt Cc: Jann Horn Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Minchan Kim Cc: Oleg Nesterov Cc: Rik van Riel Cc: Roman Gushchin Cc: Shakeel Butt Cc: Tim Murray Cc: Jason Gunthorpe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c17e5cfc1e47..4d7245e6802a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -532,6 +532,10 @@ enum page_entry_size { */ struct vm_operations_struct { void (*open)(struct vm_area_struct * area); + /** + * @close: Called when the VMA is being removed from the MM. + * Context: User context. May sleep. Caller holds mmap_lock. + */ void (*close)(struct vm_area_struct * area); /* Called any time before splitting to check if it's allowed */ int (*may_split)(struct vm_area_struct *area, unsigned long addr); -- cgit v1.2.3 From 08d5b29eac7dd5e6c79b66d390ecbb9219e05931 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Fri, 14 Jan 2022 14:06:33 -0800 Subject: mm: ptep_clear() page table helper We have ptep_get_and_clear() and ptep_get_and_clear_full() helpers to clear PTE from user page tables, but there is no variant for simple clear of a present PTE from user page tables without using a low level pte_clear() which can be either native or para-virtualised. Add a new ptep_clear() that can be used in common code to clear PTEs from page table. We will need this call later in order to add a hook for page table check. Link: https://lkml.kernel.org/r/20211221154650.1047963-3-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Cc: Aneesh Kumar K.V Cc: Dave Hansen Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jiri Slaby Cc: Jonathan Corbet Cc: Kees Cook Cc: Masahiro Yamada Cc: Mike Rapoport Cc: Muchun Song Cc: Paul Turner Cc: Peter Zijlstra Cc: Sami Tolvanen Cc: Thomas Gleixner Cc: Wei Xu Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pgtable.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e24d2c992b11..bc8713a76e03 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -258,6 +258,14 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef __HAVE_ARCH_PTEP_CLEAR +static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pte_clear(mm, addr, ptep); +} +#endif + #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, -- cgit v1.2.3 From df4e817b710809425d899340dbfa8504a3ca4ba5 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Fri, 14 Jan 2022 14:06:37 -0800 Subject: mm: page table check Check user page table entries at the time they are added and removed. Allows to synchronously catch memory corruption issues related to double mapping. When a pte for an anonymous page is added into page table, we verify that this pte does not already point to a file backed page, and vice versa if this is a file backed page that is being added we verify that this page does not have an anonymous mapping We also enforce that read-only sharing for anonymous pages is allowed (i.e. cow after fork). All other sharing must be for file pages. Page table check allows to protect and debug cases where "struct page" metadata became corrupted for some reason. For example, when refcnt or mapcount become invalid. Link: https://lkml.kernel.org/r/20211221154650.1047963-4-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Cc: Aneesh Kumar K.V Cc: Dave Hansen Cc: David Rientjes Cc: Frederic Weisbecker Cc: Greg Thelen Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jiri Slaby Cc: Jonathan Corbet Cc: Kees Cook Cc: Masahiro Yamada Cc: Mike Rapoport Cc: Muchun Song Cc: Paul Turner Cc: Peter Zijlstra Cc: Sami Tolvanen Cc: Thomas Gleixner Cc: Wei Xu Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_table_check.h | 147 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 include/linux/page_table_check.h (limited to 'include') diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h new file mode 100644 index 000000000000..38cace1da7b6 --- /dev/null +++ b/include/linux/page_table_check.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (c) 2021, Google LLC. + * Pasha Tatashin + */ +#ifndef __LINUX_PAGE_TABLE_CHECK_H +#define __LINUX_PAGE_TABLE_CHECK_H + +#ifdef CONFIG_PAGE_TABLE_CHECK +#include + +extern struct static_key_true page_table_check_disabled; +extern struct page_ext_operations page_table_check_ops; + +void __page_table_check_zero(struct page *page, unsigned int order); +void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t pte); +void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr, + pmd_t pmd); +void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr, + pud_t pud); +void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd); +void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud); + +static inline void page_table_check_alloc(struct page *page, unsigned int order) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_zero(page, order); +} + +static inline void page_table_check_free(struct page *page, unsigned int order) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_zero(page, order); +} + +static inline void page_table_check_pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t pte) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pte_clear(mm, addr, pte); +} + +static inline void page_table_check_pmd_clear(struct mm_struct *mm, + unsigned long addr, pmd_t pmd) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pmd_clear(mm, addr, pmd); +} + +static inline void page_table_check_pud_clear(struct mm_struct *mm, + unsigned long addr, pud_t pud) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pud_clear(mm, addr, pud); +} + +static inline void page_table_check_pte_set(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + pte_t pte) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pte_set(mm, addr, ptep, pte); +} + +static inline void page_table_check_pmd_set(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp, + pmd_t pmd) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pmd_set(mm, addr, pmdp, pmd); +} + +static inline void page_table_check_pud_set(struct mm_struct *mm, + unsigned long addr, pud_t *pudp, + pud_t pud) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pud_set(mm, addr, pudp, pud); +} + +#else + +static inline void page_table_check_alloc(struct page *page, unsigned int order) +{ +} + +static inline void page_table_check_free(struct page *page, unsigned int order) +{ +} + +static inline void page_table_check_pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t pte) +{ +} + +static inline void page_table_check_pmd_clear(struct mm_struct *mm, + unsigned long addr, pmd_t pmd) +{ +} + +static inline void page_table_check_pud_clear(struct mm_struct *mm, + unsigned long addr, pud_t pud) +{ +} + +static inline void page_table_check_pte_set(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + pte_t pte) +{ +} + +static inline void page_table_check_pmd_set(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp, + pmd_t pmd) +{ +} + +static inline void page_table_check_pud_set(struct mm_struct *mm, + unsigned long addr, pud_t *pudp, + pud_t pud) +{ +} + +#endif /* CONFIG_PAGE_TABLE_CHECK */ +#endif /* __LINUX_PAGE_TABLE_CHECK_H */ -- cgit v1.2.3 From 020e87650af9f43683546729f959fdc78422a4b7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Jan 2022 14:06:44 -0800 Subject: mm: remove last argument of reuse_swap_page() None of the callers care about the total_map_swapcount() any more. Link: https://lkml.kernel.org/r/20211220205943.456187-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Linus Torvalds Reviewed-by: William Kucharski Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index d1ea44b31f19..bdccbf1efa61 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -514,7 +514,7 @@ extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); -extern bool reuse_swap_page(struct page *, int *); +extern bool reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); @@ -680,8 +680,8 @@ static inline int swp_swapcount(swp_entry_t entry) return 0; } -#define reuse_swap_page(page, total_map_swapcount) \ - (page_trans_huge_mapcount(page, total_map_swapcount) == 1) +#define reuse_swap_page(page) \ + (page_trans_huge_mapcount(page, NULL) == 1) static inline int try_to_free_swap(struct page *page) { -- cgit v1.2.3 From d08d2b62510e2407cf939e693aefd179dc114913 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Jan 2022 14:06:51 -0800 Subject: mm: remove the total_mapcount argument from page_trans_huge_mapcount() All callers pass NULL, so we can stop calculating the value we would store in it. Link: https://lkml.kernel.org/r/20211220205943.456187-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski Acked-by: Linus Torvalds Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 10 +++------- include/linux/swap.h | 2 +- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 4d7245e6802a..cef65f9cbdf2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -799,19 +799,15 @@ static inline int page_mapcount(struct page *page) #ifdef CONFIG_TRANSPARENT_HUGEPAGE int total_mapcount(struct page *page); -int page_trans_huge_mapcount(struct page *page, int *total_mapcount); +int page_trans_huge_mapcount(struct page *page); #else static inline int total_mapcount(struct page *page) { return page_mapcount(page); } -static inline int page_trans_huge_mapcount(struct page *page, - int *total_mapcount) +static inline int page_trans_huge_mapcount(struct page *page) { - int mapcount = page_mapcount(page); - if (total_mapcount) - *total_mapcount = mapcount; - return mapcount; + return page_mapcount(page); } #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index bdccbf1efa61..1d38d9475c4d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -681,7 +681,7 @@ static inline int swp_swapcount(swp_entry_t entry) } #define reuse_swap_page(page) \ - (page_trans_huge_mapcount(page, NULL) == 1) + (page_trans_huge_mapcount(page) == 1) static inline int try_to_free_swap(struct page *page) { -- cgit v1.2.3 From a421ef303008b0ceee2cfc625c3246fa7654b0ca Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 14 Jan 2022 14:07:07 -0800 Subject: mm: allow !GFP_KERNEL allocations for kvmalloc Support for GFP_NO{FS,IO} and __GFP_NOFAIL has been implemented by previous patches so we can allow the support for kvmalloc. This will allow some external users to simplify or completely remove their helpers. GFP_NOWAIT semantic hasn't been supported so far but it hasn't been explicitly documented so let's add a note about that. ceph_kvmalloc is the first helper to be dropped and changed to kvmalloc. Link: https://lkml.kernel.org/r/20211122153233.9924-5-mhocko@kernel.org Signed-off-by: Michal Hocko Reviewed-by: Uladzislau Rezki (Sony) Acked-by: Vlastimil Babka Cc: Christoph Hellwig Cc: Dave Chinner Cc: Ilya Dryomov Cc: Jeff Layton Cc: Neil Brown Cc: Sebastian Andrzej Siewior Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ceph/libceph.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 409d8c29bc4f..309acbcb5a8a 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -295,7 +295,6 @@ extern bool libceph_compatible(void *data); extern const char *ceph_msg_type_name(int type); extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); -extern void *ceph_kvmalloc(size_t size, gfp_t flags); struct fs_parameter; struct fc_log; -- cgit v1.2.3 From 4034247a0d6ab281ba3293798ce67af494d86129 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 14 Jan 2022 14:07:14 -0800 Subject: mm: introduce memalloc_retry_wait() Various places in the kernel - largely in filesystems - respond to a memory allocation failure by looping around and re-trying. Some of these cannot conveniently use __GFP_NOFAIL, for reasons such as: - a GFP_ATOMIC allocation, which __GFP_NOFAIL doesn't work on - a need to check for the process being signalled between failures - the possibility that other recovery actions could be performed - the allocation is quite deep in support code, and passing down an extra flag to say if __GFP_NOFAIL is wanted would be clumsy. Many of these currently use congestion_wait() which (in almost all cases) simply waits the given timeout - congestion isn't tracked for most devices. It isn't clear what the best delay is for loops, but it is clear that the various filesystems shouldn't be responsible for choosing a timeout. This patch introduces memalloc_retry_wait() with takes on that responsibility. Code that wants to retry a memory allocation can call this function passing the GFP flags that were used. It will wait however is appropriate. For now, it only considers __GFP_NORETRY and whatever gfpflags_allow_blocking() tests. If blocking is allowed without __GFP_NORETRY, then alloc_page either made some reclaim progress, or waited for a while, before failing. So there is no need for much further waiting. memalloc_retry_wait() will wait until the current jiffie ends. If this condition is not met, then alloc_page() won't have waited much if at all. In that case memalloc_retry_wait() waits about 200ms. This is the delay that most current loops uses. linux/sched/mm.h needs to be included in some files now, but linux/backing-dev.h does not. Link: https://lkml.kernel.org/r/163754371968.13692.1277530886009912421@noble.neil.brown.name Signed-off-by: NeilBrown Cc: Dave Chinner Cc: Michal Hocko Cc: "Theodore Ts'o" Cc: Jaegeuk Kim Cc: Chao Yu Cc: Darrick J. Wong Cc: Chuck Lever Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index aca874d33fe6..aa5f09ca5bcf 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -214,6 +214,32 @@ static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } static inline void fs_reclaim_release(gfp_t gfp_mask) { } #endif +/* Any memory-allocation retry loop should use + * memalloc_retry_wait(), and pass the flags for the most + * constrained allocation attempt that might have failed. + * This provides useful documentation of where loops are, + * and a central place to fine tune the waiting as the MM + * implementation changes. + */ +static inline void memalloc_retry_wait(gfp_t gfp_flags) +{ + /* We use io_schedule_timeout because waiting for memory + * typically included waiting for dirty pages to be + * written out, which requires IO. + */ + __set_current_state(TASK_UNINTERRUPTIBLE); + gfp_flags = current_gfp_context(gfp_flags); + if (gfpflags_allow_blocking(gfp_flags) && + !(gfp_flags & __GFP_NORETRY)) + /* Probably waited already, no need for much more */ + io_schedule_timeout(1); + else + /* Probably didn't wait, and has now released a lock, + * so now is a good time to wait + */ + io_schedule_timeout(HZ/50); +} + /** * might_alloc - Mark possible allocation sites * @gfp_mask: gfp_t flags that would be used to allocate -- cgit v1.2.3 From 1611f74a94ba2e0f2d25b75008ed8e76e122097a Mon Sep 17 00:00:00 2001 From: Changcheng Deng Date: Fri, 14 Jan 2022 14:07:21 -0800 Subject: mm: fix boolreturn.cocci warning Return statements in functions returning bool should use true/false instead of 1/0. Link: https://lkml.kernel.org/r/20211126073327.74815-1-deng.changcheng@zte.com.cn Signed-off-by: Changcheng Deng Reported-by: Zeal Robot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b5f14d581113..18423c2157e8 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -383,7 +383,7 @@ static __always_inline int TestClearPage##uname(struct page *page) \ TESTCLEARFLAG(uname, lname, policy) #define TESTPAGEFLAG_FALSE(uname, lname) \ -static inline bool folio_test_##lname(const struct folio *folio) { return 0; } \ +static inline bool folio_test_##lname(const struct folio *folio) { return false; } \ static inline int Page##uname(const struct page *page) { return 0; } #define SETPAGEFLAG_NOOP(uname, lname) \ -- cgit v1.2.3 From be1a13eb51077b2ec5f7f4306f93dfece503a3f1 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 14 Jan 2022 14:07:27 -0800 Subject: mm: drop node from alloc_pages_vma alloc_pages_vma is meant to allocate a page with a vma specific memory policy. The initial node parameter is always a local node so it is pointless to waste a function argument for this. Drop the parameter. Link: https://lkml.kernel.org/r/YaSnlv4QpryEpesG@dhcp22.suse.cz Signed-off-by: Michal Hocko Cc: Aneesh Kumar K.V Cc: Ben Widawsky Cc: Dave Hansen Cc: Feng Tang Cc: Andrea Arcangeli Cc: Mel Gorman Cc: Mike Kravetz Cc: Randy Dunlap Cc: Vlastimil Babka Cc: Andi Kleen Cc: Dan Williams Cc: "Huang, Ying" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 8fcc38467af6..78b58448f796 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -598,9 +598,9 @@ struct page *alloc_pages(gfp_t gfp, unsigned int order); struct folio *folio_alloc(gfp_t gfp, unsigned order); extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, - int node, bool hugepage); + bool hugepage); #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ - alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) + alloc_pages_vma(gfp_mask, order, vma, addr, true) #else static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) { @@ -610,14 +610,14 @@ static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order) { return __folio_alloc_node(gfp, order, numa_node_id()); } -#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ +#define alloc_pages_vma(gfp_mask, order, vma, addr, false)\ alloc_pages(gfp_mask, order) #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages(gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) + alloc_pages_vma(gfp_mask, 0, vma, addr, false) extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); -- cgit v1.2.3 From 04a536bfbd0f885338eecc2a4503dfca50ac94dd Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Fri, 14 Jan 2022 14:07:30 -0800 Subject: include/linux/gfp.h: further document GFP_DMA32 kmalloc(..., GFP_DMA32) does not return DMA32 memory because the DMA32 kmalloc cache array is not implemented. (Reason: there is no such user in kernel). Put a short comment about this so people can understand this by reading the comment. [1] https://lists.linuxfoundation.org/pipermail/iommu/2018-December/031696.html Link: https://lkml.kernel.org/r/20211207093610.6406-1-miles.chen@mediatek.com Signed-off-by: Miles Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 78b58448f796..80f63c862be5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -302,7 +302,9 @@ struct vm_area_struct; * lowest zone as a type of emergency reserve. * * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit - * address. + * address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory + * because the DMA32 kmalloc cache array is not implemented. + * (Reason: there is no such user in kernel). * * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, * do not need to be directly accessible by the kernel but that cannot -- cgit v1.2.3 From 62b3107073646e0946bd97ff926832bafb846d17 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 14 Jan 2022 14:07:37 -0800 Subject: mm_zone: add function to check if managed dma zone exists Patch series "Handle warning of allocation failure on DMA zone w/o managed pages", v4. **Problem observed: On x86_64, when crash is triggered and entering into kdump kernel, page allocation failure can always be seen. --------------------------------- DMA: preallocated 128 KiB GFP_KERNEL pool for atomic allocations swapper/0: page allocation failure: order:5, mode:0xcc1(GFP_KERNEL|GFP_DMA), nodemask=(null),cpuset=/,mems_allowed=0 CPU: 0 PID: 1 Comm: swapper/0 Call Trace: dump_stack+0x7f/0xa1 warn_alloc.cold+0x72/0xd6 ...... __alloc_pages+0x24d/0x2c0 ...... dma_atomic_pool_init+0xdb/0x176 do_one_initcall+0x67/0x320 ? rcu_read_lock_sched_held+0x3f/0x80 kernel_init_freeable+0x290/0x2dc ? rest_init+0x24f/0x24f kernel_init+0xa/0x111 ret_from_fork+0x22/0x30 Mem-Info: ------------------------------------ ***Root cause: In the current kernel, it assumes that DMA zone must have managed pages and try to request pages if CONFIG_ZONE_DMA is enabled. While this is not always true. E.g in kdump kernel of x86_64, only low 1M is presented and locked down at very early stage of boot, so that this low 1M won't be added into buddy allocator to become managed pages of DMA zone. This exception will always cause page allocation failure if page is requested from DMA zone. ***Investigation: This failure happens since below commit merged into linus's tree. 1a6a9044b967 x86/setup: Remove CONFIG_X86_RESERVE_LOW and reservelow= options 23721c8e92f7 x86/crash: Remove crash_reserve_low_1M() f1d4d47c5851 x86/setup: Always reserve the first 1M of RAM 7c321eb2b843 x86/kdump: Remove the backup region handling 6f599d84231f x86/kdump: Always reserve the low 1M when the crashkernel option is specified Before them, on x86_64, the low 640K area will be reused by kdump kernel. So in kdump kernel, the content of low 640K area is copied into a backup region for dumping before jumping into kdump. Then except of those firmware reserved region in [0, 640K], the left area will be added into buddy allocator to become available managed pages of DMA zone. However, after above commits applied, in kdump kernel of x86_64, the low 1M is reserved by memblock, but not released to buddy allocator. So any later page allocation requested from DMA zone will fail. At the beginning, if crashkernel is reserved, the low 1M need be locked down because AMD SME encrypts memory making the old backup region mechanims impossible when switching into kdump kernel. Later, it was also observed that there are BIOSes corrupting memory under 1M. To solve this, in commit f1d4d47c5851, the entire region of low 1M is always reserved after the real mode trampoline is allocated. Besides, recently, Intel engineer mentioned their TDX (Trusted domain extensions) which is under development in kernel also needs to lock down the low 1M. So we can't simply revert above commits to fix the page allocation failure from DMA zone as someone suggested. ***Solution: Currently, only DMA atomic pool and dma-kmalloc will initialize and request page allocation with GFP_DMA during bootup. So only initializ DMA atomic pool when DMA zone has available managed pages, otherwise just skip the initialization. For dma-kmalloc(), for the time being, let's mute the warning of allocation failure if requesting pages from DMA zone while no manged pages. Meanwhile, change code to use dma_alloc_xx/dma_map_xx API to replace kmalloc(GFP_DMA), or do not use GFP_DMA when calling kmalloc() if not necessary. Christoph is posting patches to fix those under drivers/scsi/. Finally, we can remove the need of dma-kmalloc() as people suggested. This patch (of 3): In some places of the current kernel, it assumes that dma zone must have managed pages if CONFIG_ZONE_DMA is enabled. While this is not always true. E.g in kdump kernel of x86_64, only low 1M is presented and locked down at very early stage of boot, so that there's no managed pages at all in DMA zone. This exception will always cause page allocation failure if page is requested from DMA zone. Here add function has_managed_dma() and the relevant helper functions to check if there's DMA zone with managed pages. It will be used in later patches. Link: https://lkml.kernel.org/r/20211223094435.248523-1-bhe@redhat.com Link: https://lkml.kernel.org/r/20211223094435.248523-2-bhe@redhat.com Fixes: 6f599d84231f ("x86/kdump: Always reserve the low 1M when the crashkernel option is specified") Signed-off-by: Baoquan He Reviewed-by: David Hildenbrand Acked-by: John Donnelly Cc: Christoph Hellwig Cc: Christoph Lameter Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vlastimil Babka Cc: David Laight Cc: Borislav Petkov Cc: Marek Szyprowski Cc: Robin Murphy Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 936dc0b6c226..aed44e9b5d89 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1047,6 +1047,15 @@ static inline int is_highmem_idx(enum zone_type idx) #endif } +#ifdef CONFIG_ZONE_DMA +bool has_managed_dma(void); +#else +static inline bool has_managed_dma(void) +{ + return false; +} +#endif + /** * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references -- cgit v1.2.3 From f47761999052b1cc987dd3e3d3adf47997358fc0 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 14 Jan 2022 14:07:48 -0800 Subject: hugetlb: add hugetlb.*.numa_stat file For hugetlb backed jobs/VMs it's critical to understand the numa information for the memory backing these jobs to deliver optimal performance. Currently this technically can be queried from /proc/self/numa_maps, but there are significant issues with that. Namely: 1. Memory can be mapped or unmapped. 2. numa_maps are per process and need to be aggregated across all processes in the cgroup. For shared memory this is more involved as the userspace needs to make sure it doesn't double count shared mappings. 3. I believe querying numa_maps needs to hold the mmap_lock which adds to the contention on this lock. For these reasons I propose simply adding hugetlb.*.numa_stat file, which shows the numa information of the cgroup similarly to memory.numa_stat. On cgroup-v2: cat /sys/fs/cgroup/unified/test/hugetlb.2MB.numa_stat total=2097152 N0=2097152 N1=0 On cgroup-v1: cat /sys/fs/cgroup/hugetlb/test/hugetlb.2MB.numa_stat total=2097152 N0=2097152 N1=0 hierarichal_total=2097152 N0=2097152 N1=0 This patch was tested manually by allocating hugetlb memory and querying the hugetlb.*.numa_stat file of the cgroup and its parents. [colin.i.king@googlemail.com: fix spelling mistake "hierarichal" -> "hierarchical"] Link: https://lkml.kernel.org/r/20211125090635.23508-1-colin.i.king@gmail.com [keescook@chromium.org: fix copy/paste array assignment] Link: https://lkml.kernel.org/r/20211203065647.2819707-1-keescook@chromium.org Link: https://lkml.kernel.org/r/20211123001020.4083653-1-almasrymina@google.com Signed-off-by: Mina Almasry Signed-off-by: Colin Ian King Signed-off-by: Kees Cook Reviewed-by: Shakeel Butt Reviewed-by: Muchun Song Reviewed-by: Mike Kravetz Cc: Shuah Khan Cc: Miaohe Lin Cc: Oscar Salvador Cc: Michal Hocko Cc: David Rientjes Cc: Jue Wang Cc: Yang Yao Cc: Joanna Li Cc: Cannon Matthews Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 4 ++-- include/linux/hugetlb_cgroup.h | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 00351ccb49a3..d1897a69c540 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -622,8 +622,8 @@ struct hstate { #endif #ifdef CONFIG_CGROUP_HUGETLB /* cgroup control files */ - struct cftype cgroup_files_dfl[7]; - struct cftype cgroup_files_legacy[9]; + struct cftype cgroup_files_dfl[8]; + struct cftype cgroup_files_legacy[10]; #endif char name[HSTATE_NAME_LEN]; }; diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index ba025ae27882..379344828e78 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -36,6 +36,11 @@ enum hugetlb_memory_event { HUGETLB_NR_MEMORY_EVENTS, }; +struct hugetlb_cgroup_per_node { + /* hugetlb usage in pages over all hstates. */ + unsigned long usage[HUGE_MAX_HSTATE]; +}; + struct hugetlb_cgroup { struct cgroup_subsys_state css; @@ -57,6 +62,8 @@ struct hugetlb_cgroup { /* Handle for "hugetlb.events.local" */ struct cgroup_file events_local_file[HUGE_MAX_HSTATE]; + + struct hugetlb_cgroup_per_node *nodeinfo[]; }; static inline struct hugetlb_cgroup * -- cgit v1.2.3 From e9ea874a8ffb0f8ebed4f4981531a32c5b663d79 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Fri, 14 Jan 2022 14:07:55 -0800 Subject: mm/vmstat: add events for THP max_ptes_* exceeds There are interfaces to adjust max_ptes_none, max_ptes_swap, max_ptes_shared values, see /sys/kernel/mm/transparent_hugepage/khugepaged/. But system administrator may not know which value is the best. So Add those events to support adjusting max_ptes_* to suitable values. For example, if default max_ptes_swap value causes too much failures, and system uses zram whose IO is fast, administrator could increase max_ptes_swap until THP_SCAN_EXCEED_SWAP_PTE not increase anymore. Link: https://lkml.kernel.org/r/20211225094036.574157-1-yang.yang29@zte.com.cn Signed-off-by: Yang Yang Cc: "Huang, Ying" Cc: Dave Hansen Cc: Minchan Kim Cc: Saravanan D Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index a185cc75ff52..7b2363388bfa 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -98,6 +98,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_SPLIT_PAGE_FAILED, THP_DEFERRED_SPLIT_PAGE, THP_SPLIT_PMD, + THP_SCAN_EXCEED_NONE_PTE, + THP_SCAN_EXCEED_SWAP_PTE, + THP_SCAN_EXCEED_SHARED_PTE, #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD THP_SPLIT_PUD, #endif -- cgit v1.2.3 From e4b424b7ec8791087375bb1f2480a3ba05d21e0b Mon Sep 17 00:00:00 2001 From: Gang Li Date: Fri, 14 Jan 2022 14:08:07 -0800 Subject: vmscan: make drop_slab_node static drop_slab_node is only used in drop_slab. So remove it's declaration from header file and add keyword static for it's definition. Link: https://lkml.kernel.org/r/20211111062445.5236-1-ligang.bdlg@bytedance.com Signed-off-by: Gang Li Reviewed-by: David Hildenbrand Reviewed-by: Muchun Song Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index cef65f9cbdf2..eb67eb699b78 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3122,7 +3122,6 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, void *, size_t *, #endif void drop_slab(void); -void drop_slab_node(int nid); #ifndef CONFIG_MMU #define randomize_va_space 0 -- cgit v1.2.3 From c6018b4b254971863bd0ad36bb5e7d0fa0f0ddb0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 14 Jan 2022 14:08:17 -0800 Subject: mm/mempolicy: add set_mempolicy_home_node syscall This syscall can be used to set a home node for the MPOL_BIND and MPOL_PREFERRED_MANY memory policy. Users should use this syscall after setting up a memory policy for the specified range as shown below. mbind(p, nr_pages * page_size, MPOL_BIND, new_nodes->maskp, new_nodes->size + 1, 0); sys_set_mempolicy_home_node((unsigned long)p, nr_pages * page_size, home_node, 0); The syscall allows specifying a home node/preferred node from which kernel will fulfill memory allocation requests first. For address range with MPOL_BIND memory policy, if nodemask specifies more than one node, page allocations will come from the node in the nodemask with sufficient free memory that is closest to the home node/preferred node. For MPOL_PREFERRED_MANY if the nodemask specifies more than one node, page allocation will come from the node in the nodemask with sufficient free memory that is closest to the home node/preferred node. If there is not enough memory in all the nodes specified in the nodemask, the allocation will be attempted from the closest numa node to the home node in the system. This helps applications to hint at a memory allocation preference node and fallback to _only_ a set of nodes if the memory is not available on the preferred node. Fallback allocation is attempted from the node which is nearest to the preferred node. This helps applications to have control on memory allocation numa nodes and avoids default fallback to slow memory NUMA nodes. For example a system with NUMA nodes 1,2 and 3 with DRAM memory and 10, 11 and 12 of slow memory new_nodes = numa_bitmask_alloc(nr_nodes); numa_bitmask_setbit(new_nodes, 1); numa_bitmask_setbit(new_nodes, 2); numa_bitmask_setbit(new_nodes, 3); p = mmap(NULL, nr_pages * page_size, protflag, mapflag, -1, 0); mbind(p, nr_pages * page_size, MPOL_BIND, new_nodes->maskp, new_nodes->size + 1, 0); sys_set_mempolicy_home_node(p, nr_pages * page_size, 2, 0); This will allocate from nodes closer to node 2 and will make sure the kernel will only allocate from nodes 1, 2, and 3. Memory will not be allocated from slow memory nodes 10, 11, and 12. This differs from default MPOL_BIND behavior in that with default MPOL_BIND the allocation will be attempted from node closer to the local node. One of the reasons to specify a home node is to allow allocations from cpu less NUMA node and its nearby NUMA nodes. With MPOL_PREFERRED_MANY on the other hand will first try to allocate from the closest node to node 2 from the node list 1, 2 and 3. If those nodes don't have enough memory, kernel will allocate from slow memory node 10, 11 and 12 which ever is closer to node 2. Link: https://lkml.kernel.org/r/20211202123810.267175-3-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V Cc: Ben Widawsky Cc: Dave Hansen Cc: Feng Tang Cc: Michal Hocko Cc: Andrea Arcangeli Cc: Mel Gorman Cc: Mike Kravetz Cc: Randy Dunlap Cc: Vlastimil Babka Cc: Andi Kleen Cc: Dan Williams Cc: Huang Ying Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 3c7595e81150..668389b4b53d 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -46,6 +46,7 @@ struct mempolicy { unsigned short mode; /* See MPOL_* above */ unsigned short flags; /* See set_mempolicy() MPOL_F_* above */ nodemask_t nodes; /* interleave/bind/perfer */ + int home_node; /* Home node to use for MPOL_BIND and MPOL_PREFERRED_MANY */ union { nodemask_t cpuset_mems_allowed; /* relative to these nodes */ -- cgit v1.2.3 From 21b084fdf2a49ca1634e8e360e9ab6f9ff0dee11 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 14 Jan 2022 14:08:21 -0800 Subject: mm/mempolicy: wire up syscall set_mempolicy_home_node Link: https://lkml.kernel.org/r/20211202123810.267175-4-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V Cc: Ben Widawsky Cc: Dave Hansen Cc: Feng Tang Cc: Michal Hocko Cc: Andrea Arcangeli Cc: Mel Gorman Cc: Mike Kravetz Cc: Randy Dunlap Cc: Vlastimil Babka Cc: Andi Kleen Cc: Dan Williams Cc: Huang Ying Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 3 +++ include/uapi/asm-generic/unistd.h | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 528a478dbda8..819c0cb00b6d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1057,6 +1057,9 @@ asmlinkage long sys_landlock_add_rule(int ruleset_fd, enum landlock_rule_type ru const void __user *rule_attr, __u32 flags); asmlinkage long sys_landlock_restrict_self(int ruleset_fd, __u32 flags); asmlinkage long sys_memfd_secret(unsigned int flags); +asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long len, + unsigned long home_node, + unsigned long flags); /* * Architecture-specific system calls diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 4557a8b6086f..1c48b0ae3ba3 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -883,8 +883,11 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) #define __NR_futex_waitv 449 __SYSCALL(__NR_futex_waitv, sys_futex_waitv) +#define __NR_set_mempolicy_home_node 450 +__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) + #undef __NR_syscalls -#define __NR_syscalls 450 +#define __NR_syscalls 451 /* * 32 bit systems traditionally used different -- cgit v1.2.3 From 84b328aa81216e08804d8875d63f26bda1298788 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 14 Jan 2022 14:08:40 -0800 Subject: mm: compaction: fix the migration stats in trace_mm_compaction_migratepages() Now the migrate_pages() has changed to return the number of {normal page, THP, hugetlb} instead, thus we should not use the return value to calculate the number of pages migrated successfully. Instead we can just use the 'nr_succeeded' which indicates the number of normal pages migrated successfully to calculate the non-migrated pages in trace_mm_compaction_migratepages(). Link: https://lkml.kernel.org/r/b4225251c4bec068dcd90d275ab7de88a39e2bd7.1636275127.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Reviewed-by: Steven Rostedt (VMware) Cc: Zi Yan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/compaction.h | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index 54e5bf081171..7d48e7079e48 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -68,10 +68,9 @@ DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_freepages, TRACE_EVENT(mm_compaction_migratepages, TP_PROTO(unsigned long nr_all, - int migrate_rc, - struct list_head *migratepages), + unsigned int nr_succeeded), - TP_ARGS(nr_all, migrate_rc, migratepages), + TP_ARGS(nr_all, nr_succeeded), TP_STRUCT__entry( __field(unsigned long, nr_migrated) @@ -79,23 +78,8 @@ TRACE_EVENT(mm_compaction_migratepages, ), TP_fast_assign( - unsigned long nr_failed = 0; - struct list_head *page_lru; - - /* - * migrate_pages() returns either a non-negative number - * with the number of pages that failed migration, or an - * error code, in which case we need to count the remaining - * pages manually - */ - if (migrate_rc >= 0) - nr_failed = migrate_rc; - else - list_for_each(page_lru, migratepages) - nr_failed++; - - __entry->nr_migrated = nr_all - nr_failed; - __entry->nr_failed = nr_failed; + __entry->nr_migrated = nr_succeeded; + __entry->nr_failed = nr_all - nr_succeeded; ), TP_printk("nr_migrated=%lu nr_failed=%lu", -- cgit v1.2.3 From c0e582de6066e97c83a466f0e5983e3148123526 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 14 Jan 2022 14:08:56 -0800 Subject: mm/thp: drop unused trace events hugepage_[invalidate|splitting] The trace events hugepage_[invalidate|splitting], were added via the commit 9e813308a5c1 ("powerpc/thp: Add tracepoints to track hugepage invalidate"). Afterwards their call sites i.e trace_hugepage_[invalidate|splitting] were just dropped off, leaving these trace points unused. Link: https://lkml.kernel.org/r/1641546351-15109-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: David Hildenbrand Cc: Steven Rostedt Cc: Ingo Molnar Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/thp.h | 35 ----------------------------------- 1 file changed, 35 deletions(-) (limited to 'include') diff --git a/include/trace/events/thp.h b/include/trace/events/thp.h index d7fbbe551841..ca3f2767828a 100644 --- a/include/trace/events/thp.h +++ b/include/trace/events/thp.h @@ -8,24 +8,6 @@ #include #include -TRACE_EVENT(hugepage_invalidate, - - TP_PROTO(unsigned long addr, unsigned long pte), - TP_ARGS(addr, pte), - TP_STRUCT__entry( - __field(unsigned long, addr) - __field(unsigned long, pte) - ), - - TP_fast_assign( - __entry->addr = addr; - __entry->pte = pte; - ), - - TP_printk("hugepage invalidate at addr 0x%lx and pte = 0x%lx", - __entry->addr, __entry->pte) -); - TRACE_EVENT(hugepage_set_pmd, TP_PROTO(unsigned long addr, unsigned long pmd), @@ -65,23 +47,6 @@ TRACE_EVENT(hugepage_update, TP_printk("hugepage update at addr 0x%lx and pte = 0x%lx clr = 0x%lx, set = 0x%lx", __entry->addr, __entry->pte, __entry->clr, __entry->set) ); -TRACE_EVENT(hugepage_splitting, - - TP_PROTO(unsigned long addr, unsigned long pte), - TP_ARGS(addr, pte), - TP_STRUCT__entry( - __field(unsigned long, addr) - __field(unsigned long, pte) - ), - - TP_fast_assign( - __entry->addr = addr; - __entry->pte = pte; - ), - - TP_printk("hugepage splitting at addr 0x%lx and pte = 0x%lx", - __entry->addr, __entry->pte) -); #endif /* _TRACE_THP_H */ -- cgit v1.2.3 From c9fdc4d5487a16bd1f003fc8b66e91f88efb50e6 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 14 Jan 2022 14:09:06 -0800 Subject: mm/hwpoison: remove MF_MSG_BUDDY_2ND and MF_MSG_POISONED_HUGE These action_page_types are no longer used, so remove them. Link: https://lkml.kernel.org/r/20211115084006.3728254-3-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Acked-by: Yang Shi Cc: "Aneesh Kumar K.V" Cc: David Hildenbrand Cc: Ding Hui Cc: Miaohe Lin Cc: Michal Hocko Cc: Oscar Salvador Cc: Peter Xu Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 -- include/ras/ras_event.h | 2 -- 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index eb67eb699b78..7f594da84aca 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3201,7 +3201,6 @@ enum mf_action_page_type { MF_MSG_KERNEL_HIGH_ORDER, MF_MSG_SLAB, MF_MSG_DIFFERENT_COMPOUND, - MF_MSG_POISONED_HUGE, MF_MSG_HUGE, MF_MSG_FREE_HUGE, MF_MSG_NON_PMD_HUGE, @@ -3216,7 +3215,6 @@ enum mf_action_page_type { MF_MSG_CLEAN_LRU, MF_MSG_TRUNCATED_LRU, MF_MSG_BUDDY, - MF_MSG_BUDDY_2ND, MF_MSG_DAX, MF_MSG_UNSPLIT_THP, MF_MSG_UNKNOWN, diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 0bdbc0d17d2f..d0337a41141c 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -358,7 +358,6 @@ TRACE_EVENT(aer_event, EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" ) \ EM ( MF_MSG_SLAB, "kernel slab page" ) \ EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \ - EM ( MF_MSG_POISONED_HUGE, "huge page already hardware poisoned" ) \ EM ( MF_MSG_HUGE, "huge page" ) \ EM ( MF_MSG_FREE_HUGE, "free huge page" ) \ EM ( MF_MSG_NON_PMD_HUGE, "non-pmd-sized huge page" ) \ @@ -373,7 +372,6 @@ TRACE_EVENT(aer_event, EM ( MF_MSG_CLEAN_LRU, "clean LRU page" ) \ EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" ) \ EM ( MF_MSG_BUDDY, "free buddy page" ) \ - EM ( MF_MSG_BUDDY_2ND, "free buddy page (2nd try)" ) \ EM ( MF_MSG_DAX, "dax page" ) \ EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \ EMe ( MF_MSG_UNKNOWN, "unknown page" ) -- cgit v1.2.3 From bf181c582588f8f7406d52f2ee228539b465f173 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 14 Jan 2022 14:09:09 -0800 Subject: mm/hwpoison: fix unpoison_memory() After recent soft-offline rework, error pages can be taken off from buddy allocator, but the existing unpoison_memory() does not properly undo the operation. Moreover, due to the recent change on __get_hwpoison_page(), get_page_unless_zero() is hardly called for hwpoisoned pages. So __get_hwpoison_page() highly likely returns -EBUSY (meaning to fail to grab page refcount) and unpoison just clears PG_hwpoison without releasing a refcount. That does not lead to a critical issue like kernel panic, but unpoisoned pages never get back to buddy (leaked permanently), which is not good. To (partially) fix this, we need to identify "taken off" pages from other types of hwpoisoned pages. We can't use refcount or page flags for this purpose, so a pseudo flag is defined by hacking ->private field. Someone might think that put_page() is enough to cancel taken-off pages, but the normal free path contains some operations not suitable for the current purpose, and can fire VM_BUG_ON(). Note that unpoison_memory() is now supposed to be cancel hwpoison events injected only by madvise() or /sys/devices/system/memory/{hard,soft}_offline_page, not by MCE injection, so please don't try to use unpoison when testing with MCE injection. [lkp@intel.com: report build failure for ARCH=i386] Link: https://lkml.kernel.org/r/20211115084006.3728254-4-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Reviewed-by: Yang Shi Cc: David Hildenbrand Cc: Oscar Salvador Cc: Michal Hocko Cc: Ding Hui Cc: Tony Luck Cc: "Aneesh Kumar K.V" Cc: Miaohe Lin Cc: Peter Xu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + include/linux/page-flags.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7f594da84aca..d4fb49a5d60d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3174,6 +3174,7 @@ enum mf_flags { MF_ACTION_REQUIRED = 1 << 1, MF_MUST_KILL = 1 << 2, MF_SOFT_OFFLINE = 1 << 3, + MF_UNPOISON = 1 << 4, }; extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue(unsigned long pfn, int flags); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 18423c2157e8..7e2b90dc7d3f 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -522,7 +522,11 @@ PAGEFLAG_FALSE(Uncached, uncached) PAGEFLAG(HWPoison, hwpoison, PF_ANY) TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) +#define MAGIC_HWPOISON 0x48575053U /* HWPS */ +extern void SetPageHWPoisonTakenOff(struct page *page); +extern void ClearPageHWPoisonTakenOff(struct page *page); extern bool take_page_off_buddy(struct page *page); +extern bool put_page_back_buddy(struct page *page); #else PAGEFLAG_FALSE(HWPoison, hwpoison) #define __PG_HWPOISON 0 -- cgit v1.2.3 From 5ee2fa2f063649570c702164f47a558a3432dd9e Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 14 Jan 2022 14:09:16 -0800 Subject: mm/rmap: fix potential batched TLB flush race In theory, the following race is possible for batched TLB flushing. CPU0 CPU1 ---- ---- shrink_page_list() unmap zap_pte_range() flush_tlb_batched_pending() flush_tlb_mm() try_to_unmap() set_tlb_ubc_flush_pending() mm->tlb_flush_batched = true mm->tlb_flush_batched = false After the TLB is flushed on CPU1 via flush_tlb_mm() and before mm->tlb_flush_batched is set to false, some PTE is unmapped on CPU0 and the TLB flushing is pended. Then the pended TLB flushing will be lost. Although both set_tlb_ubc_flush_pending() and flush_tlb_batched_pending() are called with PTL locked, different PTL instances may be used. Because the race window is really small, and the lost TLB flushing will cause problem only if a TLB entry is inserted before the unmapping in the race window, the race is only theoretical. But the fix is simple and cheap too. Syzbot has reported this too as follows: ================================================================== BUG: KCSAN: data-race in flush_tlb_batched_pending / try_to_unmap_one write to 0xffff8881072cfbbc of 1 bytes by task 17406 on cpu 1: flush_tlb_batched_pending+0x5f/0x80 mm/rmap.c:691 madvise_free_pte_range+0xee/0x7d0 mm/madvise.c:594 walk_pmd_range mm/pagewalk.c:128 [inline] walk_pud_range mm/pagewalk.c:205 [inline] walk_p4d_range mm/pagewalk.c:240 [inline] walk_pgd_range mm/pagewalk.c:277 [inline] __walk_page_range+0x981/0x1160 mm/pagewalk.c:379 walk_page_range+0x131/0x300 mm/pagewalk.c:475 madvise_free_single_vma mm/madvise.c:734 [inline] madvise_dontneed_free mm/madvise.c:822 [inline] madvise_vma mm/madvise.c:996 [inline] do_madvise+0xe4a/0x1140 mm/madvise.c:1202 __do_sys_madvise mm/madvise.c:1228 [inline] __se_sys_madvise mm/madvise.c:1226 [inline] __x64_sys_madvise+0x5d/0x70 mm/madvise.c:1226 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae write to 0xffff8881072cfbbc of 1 bytes by task 71 on cpu 0: set_tlb_ubc_flush_pending mm/rmap.c:636 [inline] try_to_unmap_one+0x60e/0x1220 mm/rmap.c:1515 rmap_walk_anon+0x2fb/0x470 mm/rmap.c:2301 try_to_unmap+0xec/0x110 shrink_page_list+0xe91/0x2620 mm/vmscan.c:1719 shrink_inactive_list+0x3fb/0x730 mm/vmscan.c:2394 shrink_list mm/vmscan.c:2621 [inline] shrink_lruvec+0x3c9/0x710 mm/vmscan.c:2940 shrink_node_memcgs+0x23e/0x410 mm/vmscan.c:3129 shrink_node+0x8f6/0x1190 mm/vmscan.c:3252 kswapd_shrink_node mm/vmscan.c:4022 [inline] balance_pgdat+0x702/0xd30 mm/vmscan.c:4213 kswapd+0x200/0x340 mm/vmscan.c:4473 kthread+0x2c7/0x2e0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 value changed: 0x01 -> 0x00 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 71 Comm: kswapd0 Not tainted 5.16.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ================================================================== [akpm@linux-foundation.org: tweak comments] Link: https://lkml.kernel.org/r/20211201021104.126469-1-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reported-by: syzbot+aa5bebed695edaccf0df@syzkaller.appspotmail.com Cc: Nadav Amit Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Dave Hansen Cc: Will Deacon Cc: Yu Zhao Cc: Marco Elver Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6a89f128c990..e3b0476a4fda 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -647,7 +647,7 @@ struct mm_struct { atomic_t tlb_flush_pending; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH /* See flush_tlb_batched_pending() */ - bool tlb_flush_batched; + atomic_t tlb_flush_batched; #endif struct uprobes_state uprobes_state; #ifdef CONFIG_PREEMPT_RT -- cgit v1.2.3 From cab0a7c115546a4865fb7439558af9077a569574 Mon Sep 17 00:00:00 2001 From: Ting Liu Date: Fri, 14 Jan 2022 14:09:28 -0800 Subject: mm: make some vars and functions static or __init "page_idle_ops" as a global var, but its scope of use within this document. So it should be static. "page_ext_ops" is a var used in the kernel initial phase. And other functions are aslo used in the kernel initial phase. So they should be __init or __initdata to reclaim memory. Link: https://lkml.kernel.org/r/20211217095023.67293-1-liuting.0x7c00@bytedance.com Signed-off-by: Ting Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_idle.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 83abf95e9fa7..4663dfed1293 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -13,7 +13,6 @@ * If there is not enough space to store Idle and Young bits in page flags, use * page ext flags instead. */ -extern struct page_ext_operations page_idle_ops; static inline bool folio_test_young(struct folio *folio) { -- cgit v1.2.3 From c46b0bb6a735db0b6140e12e750b5acb1b032982 Mon Sep 17 00:00:00 2001 From: Xin Hao Date: Fri, 14 Jan 2022 14:09:37 -0800 Subject: mm/damon: add 'age' of region tracepoint support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Damon, we can get age information by analyzing the nr_access change, But short time sampling is not effective, we have to obtain enough data for analysis through long time trace, this also means that we need to consume more cpu resources and storage space. Now the region add a new 'age' variable, we only need to get the change of age value through a little time trace, for example, age has been increasing to 141, but nr_access shows a value of 0 at the same time, Through this,we can conclude that the region has a very low nr_access value for a long time. Link: https://lkml.kernel.org/r/b9def1262af95e0dc1d0caea447886434db01161.1636989871.git.xhao@linux.alibaba.com Signed-off-by: Xin Hao Reviewed-by: SeongJae Park Cc: Muchun Song Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/damon.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h index 2f422f4f1fb9..99ffa601e351 100644 --- a/include/trace/events/damon.h +++ b/include/trace/events/damon.h @@ -22,6 +22,7 @@ TRACE_EVENT(damon_aggregated, __field(unsigned long, start) __field(unsigned long, end) __field(unsigned int, nr_accesses) + __field(unsigned int, age) ), TP_fast_assign( @@ -30,11 +31,13 @@ TRACE_EVENT(damon_aggregated, __entry->start = r->ar.start; __entry->end = r->ar.end; __entry->nr_accesses = r->nr_accesses; + __entry->age = r->age; ), - TP_printk("target_id=%lu nr_regions=%u %lu-%lu: %u", + TP_printk("target_id=%lu nr_regions=%u %lu-%lu: %u %u", __entry->target_id, __entry->nr_regions, - __entry->start, __entry->end, __entry->nr_accesses) + __entry->start, __entry->end, + __entry->nr_accesses, __entry->age) ); #endif /* _TRACE_DAMON_H */ -- cgit v1.2.3 From cdeed009f3bceee41f73f0137db785fd29a05cb8 Mon Sep 17 00:00:00 2001 From: Xin Hao Date: Fri, 14 Jan 2022 14:09:44 -0800 Subject: mm/damon: remove some unneeded function definitions in damon.h In damon.h some func definitions about VA & PA can only be used in its own file, so there no need to define in the header file, and the header file will look cleaner. If other files later need these functions, the prototypes can be added to damon.h at that time. [sj@kernel.org: remove unnecessary function prototype position changes] Link: https://lkml.kernel.org/r/20211118114827.20052-1-sj@kernel.org Link: https://lkml.kernel.org/r/45fd5b3ef6cce8e28dbc1c92f9dc845ccfc949d7.1636989871.git.xhao@linux.alibaba.com Signed-off-by: Xin Hao Signed-off-by: SeongJae Park Reviewed-by: SeongJae Park Cc: Muchun Song Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/damon.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index b4d4be3cc987..1d1be348f506 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -461,34 +461,13 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); #endif /* CONFIG_DAMON */ #ifdef CONFIG_DAMON_VADDR - -/* Monitoring primitives for virtual memory address spaces */ -void damon_va_init(struct damon_ctx *ctx); -void damon_va_update(struct damon_ctx *ctx); -void damon_va_prepare_access_checks(struct damon_ctx *ctx); -unsigned int damon_va_check_accesses(struct damon_ctx *ctx); bool damon_va_target_valid(void *t); -void damon_va_cleanup(struct damon_ctx *ctx); -int damon_va_apply_scheme(struct damon_ctx *context, struct damon_target *t, - struct damon_region *r, struct damos *scheme); -int damon_va_scheme_score(struct damon_ctx *context, struct damon_target *t, - struct damon_region *r, struct damos *scheme); void damon_va_set_primitives(struct damon_ctx *ctx); - #endif /* CONFIG_DAMON_VADDR */ #ifdef CONFIG_DAMON_PADDR - -/* Monitoring primitives for the physical memory address space */ -void damon_pa_prepare_access_checks(struct damon_ctx *ctx); -unsigned int damon_pa_check_accesses(struct damon_ctx *ctx); bool damon_pa_target_valid(void *t); -int damon_pa_apply_scheme(struct damon_ctx *context, struct damon_target *t, - struct damon_region *r, struct damos *scheme); -int damon_pa_scheme_score(struct damon_ctx *context, struct damon_target *t, - struct damon_region *r, struct damos *scheme); void damon_pa_set_primitives(struct damon_ctx *ctx); - #endif /* CONFIG_DAMON_PADDR */ #endif /* _DAMON_H */ -- cgit v1.2.3 From 9b2a38d6ef25c1748e3964b0ff30a89e4ed26583 Mon Sep 17 00:00:00 2001 From: Xin Hao Date: Fri, 14 Jan 2022 14:09:53 -0800 Subject: mm/damon: move damon_rand() definition into damon.h damon_rand() is called in three files:damon/core.c, damon/ paddr.c, damon/vaddr.c, i think there is no need to redefine this twice, So move it to damon.h will be a good choice. Link: https://lkml.kernel.org/r/20211202075859.51341-1-xhao@linux.alibaba.com Signed-off-by: Xin Hao Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/damon.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 1d1be348f506..3e91a597a1aa 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -11,12 +11,16 @@ #include #include #include +#include /* Minimal region size. Every damon_region is aligned by this. */ #define DAMON_MIN_REGION PAGE_SIZE /* Max priority score for DAMON-based operation schemes */ #define DAMOS_MAX_SCORE (99) +/* Get a random number in [l, r) */ +#define damon_rand(l, r) (l + prandom_u32_max(r - l)) + /** * struct damon_addr_range - Represents an address region of [@start, @end). * @start: Start address of the region (inclusive). -- cgit v1.2.3 From 234d68732b6c135087bdebfa0630a43ae8c27758 Mon Sep 17 00:00:00 2001 From: Xin Hao Date: Fri, 14 Jan 2022 14:09:56 -0800 Subject: mm/damon: modify damon_rand() macro to static inline function damon_rand() cannot be implemented as a macro. Example: damon_rand(a++, b); The value of 'a' will be incremented twice, This is obviously unreasonable, So there fix it. Link: https://lkml.kernel.org/r/110ffcd4e420c86c42b41ce2bc9f0fe6a4f32cd3.1638795127.git.xhao@linux.alibaba.com Fixes: b9a6ac4e4ede ("mm/damon: adaptively adjust regions") Signed-off-by: Xin Hao Reported-by: Andrew Morton Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/damon.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 3e91a597a1aa..e2c8152985b7 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -19,7 +19,10 @@ #define DAMOS_MAX_SCORE (99) /* Get a random number in [l, r) */ -#define damon_rand(l, r) (l + prandom_u32_max(r - l)) +static inline unsigned long damon_rand(unsigned long l, unsigned long r) +{ + return l + prandom_u32_max(r - l); +} /** * struct damon_addr_range - Represents an address region of [@start, @end). -- cgit v1.2.3 From 88f86dcfa454784f7de550966c60fc78a3e95d6d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 14 Jan 2022 14:09:59 -0800 Subject: mm/damon: convert macro functions to static inline functions Patch series "mm/damon: Misc cleanups". This patchset contains miscellaneous cleanups for DAMON's macro functions and documentation. This patch (of 6): This commit converts macro functions in DAMON to static inline functions, for better type checking, code documentation, etc[1]. [1] https://lore.kernel.org/linux-mm/20211202151213.6ec830863342220da4141bc5@linux-foundation.org/ Link: https://lkml.kernel.org/r/20211209131806.19317-1-sj@kernel.org Link: https://lkml.kernel.org/r/20211209131806.19317-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/damon.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index e2c8152985b7..2dbc1f545da2 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -399,14 +399,20 @@ struct damon_ctx { struct list_head schemes; }; -#define damon_next_region(r) \ - (container_of(r->list.next, struct damon_region, list)) +static inline struct damon_region *damon_next_region(struct damon_region *r) +{ + return container_of(r->list.next, struct damon_region, list); +} -#define damon_prev_region(r) \ - (container_of(r->list.prev, struct damon_region, list)) +static inline struct damon_region *damon_prev_region(struct damon_region *r) +{ + return container_of(r->list.prev, struct damon_region, list); +} -#define damon_last_region(t) \ - (list_last_entry(&t->regions_list, struct damon_region, list)) +static inline struct damon_region *damon_last_region(struct damon_target *t) +{ + return list_last_entry(&t->regions_list, struct damon_region, list); +} #define damon_for_each_region(r, t) \ list_for_each_entry(r, &t->regions_list, list) -- cgit v1.2.3 From f4c6d22c6cf282ef7d24a724b9bd978ee2b74fc6 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 14 Jan 2022 14:10:14 -0800 Subject: mm/damon: remove a mistakenly added comment for a future feature Due to a mistake in patches reordering, a comment for a future feature called 'arbitrary monitoring target support'[1], which is still under development, has added. Because it only introduces confusion and we don't have a plan to post the patches soon, this commit removes the mistakenly added part. [1] https://lore.kernel.org/linux-mm/20201215115448.25633-3-sjpark@amazon.com/ Link: https://lkml.kernel.org/r/20211209131806.19317-7-sj@kernel.org Fixes: 1f366e421c8f ("mm/damon/core: implement DAMON-based Operation Schemes (DAMOS)") Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/damon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 2dbc1f545da2..97f4a224e950 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -281,7 +281,7 @@ struct damon_ctx; * as an integer in [0, &DAMOS_MAX_SCORE]. * @apply_scheme is called from @kdamond when a region for user provided * DAMON-based operation scheme is found. It should apply the scheme's action - * to the region. This is not used for &DAMON_ARBITRARY_TARGET case. + * to the region. * @target_valid should check whether the target is still valid for the * monitoring. * @cleanup is called from @kdamond just before its termination. -- cgit v1.2.3 From 0e92c2ee9f459542c5384d9cfab24873c3dd6398 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 14 Jan 2022 14:10:17 -0800 Subject: mm/damon/schemes: account scheme actions that successfully applied Patch series "mm/damon/schemes: Extend stats for better online analysis and tuning". To help online access pattern analysis and tuning of DAMON-based Operation Schemes (DAMOS), DAMOS provides simple statistics for each scheme. Introduction of DAMOS time/space quota further made the tuning easier by making the risk management easier. However, that also made understanding of the working schemes a little bit more difficult. For an example, progress of a given scheme can now be throttled by not only the aggressiveness of the target access pattern, but also the time/space quotas. So, when a scheme is showing unexpectedly slow progress, it's difficult to know by what the progress of the scheme is throttled, with currently provided statistics. This patchset extends the statistics to contain some metrics that can be helpful for such online schemes analysis and tuning (patches 1-2), exports those to users (patches 3 and 5), and add documents (patches 4 and 6). This patch (of 6): DAMON-based operation schemes (DAMOS) stats provide only the number and the amount of regions that the action of the scheme has tried to be applied. Because the action could be failed for some reasons, the currently provided information is sometimes not useful or convenient enough for schemes profiling and tuning. To improve this situation, this commit extends the DAMOS stats to provide the number and the amount of regions that the action has successfully applied. Link: https://lkml.kernel.org/r/20211210150016.35349-1-sj@kernel.org Link: https://lkml.kernel.org/r/20211210150016.35349-2-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/damon.h | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 97f4a224e950..e0ad3d9aaeed 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -192,6 +192,20 @@ struct damos_watermarks { bool activated; }; +/** + * struct damos_stat - Statistics on a given scheme. + * @nr_tried: Total number of regions that the scheme is tried to be applied. + * @sz_tried: Total size of regions that the scheme is tried to be applied. + * @nr_applied: Total number of regions that the scheme is applied. + * @sz_applied: Total size of regions that the scheme is applied. + */ +struct damos_stat { + unsigned long nr_tried; + unsigned long sz_tried; + unsigned long nr_applied; + unsigned long sz_applied; +}; + /** * struct damos - Represents a Data Access Monitoring-based Operation Scheme. * @min_sz_region: Minimum size of target regions. @@ -203,8 +217,7 @@ struct damos_watermarks { * @action: &damo_action to be applied to the target regions. * @quota: Control the aggressiveness of this scheme. * @wmarks: Watermarks for automated (in)activation of this scheme. - * @stat_count: Total number of regions that this scheme is applied. - * @stat_sz: Total size of regions that this scheme is applied. + * @stat: Statistics of this scheme. * @list: List head for siblings. * * For each aggregation interval, DAMON finds regions which fit in the @@ -235,8 +248,7 @@ struct damos { enum damos_action action; struct damos_quota quota; struct damos_watermarks wmarks; - unsigned long stat_count; - unsigned long stat_sz; + struct damos_stat stat; struct list_head list; }; @@ -281,7 +293,8 @@ struct damon_ctx; * as an integer in [0, &DAMOS_MAX_SCORE]. * @apply_scheme is called from @kdamond when a region for user provided * DAMON-based operation scheme is found. It should apply the scheme's action - * to the region. + * to the region and return bytes of the region that the action is successfully + * applied. * @target_valid should check whether the target is still valid for the * monitoring. * @cleanup is called from @kdamond just before its termination. @@ -295,8 +308,9 @@ struct damon_primitive { int (*get_scheme_score)(struct damon_ctx *context, struct damon_target *t, struct damon_region *r, struct damos *scheme); - int (*apply_scheme)(struct damon_ctx *context, struct damon_target *t, - struct damon_region *r, struct damos *scheme); + unsigned long (*apply_scheme)(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme); bool (*target_valid)(void *target); void (*cleanup)(struct damon_ctx *context); }; -- cgit v1.2.3 From 6268eac34ca30af7f6313504d556ec7fcd295621 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 14 Jan 2022 14:10:20 -0800 Subject: mm/damon/schemes: account how many times quota limit has exceeded If the time/space quotas of a given DAMON-based operation scheme is too small, the scheme could show unexpectedly slow progress. However, there is no good way to notice the case in runtime. This commit extends the DAMOS stat to provide how many times the quota limits exceeded so that the users can easily notice the case and tune the scheme. Link: https://lkml.kernel.org/r/20211210150016.35349-3-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/damon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index e0ad3d9aaeed..af648388e759 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -198,12 +198,14 @@ struct damos_watermarks { * @sz_tried: Total size of regions that the scheme is tried to be applied. * @nr_applied: Total number of regions that the scheme is applied. * @sz_applied: Total size of regions that the scheme is applied. + * @qt_exceeds: Total number of times the quota of the scheme has exceeded. */ struct damos_stat { unsigned long nr_tried; unsigned long sz_tried; unsigned long nr_applied; unsigned long sz_applied; + unsigned long qt_exceeds; }; /** -- cgit v1.2.3 From 2cd4b8e10cc31eadb5b10b1d73b3f28156f3776c Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 14 Jan 2022 14:10:38 -0800 Subject: mm/damon: move the implementation of damon_insert_region to damon.h Usually, inline function is declared static since it should sit between storage and type. And implement it in a header file if used by multiple files. And this change also fixes compile issue when backport damon to 5.10. mm/damon/vaddr.c: In function `damon_va_evenly_split_region': ./include/linux/damon.h:425:13: error: inlining failed in call to `always_inline' `damon_insert_region': function body not available 425 | inline void damon_insert_region(struct damon_region *r, | ^~~~~~~~~~~~~~~~~~~ mm/damon/vaddr.c:86:3: note: called from here 86 | damon_insert_region(n, r, next, t); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Link: https://lkml.kernel.org/r/20211223085703.6142-1-guoqing.jiang@linux.dev Signed-off-by: Guoqing Jiang Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/damon.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index af648388e759..5e1e3a128b77 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -451,9 +451,18 @@ static inline struct damon_region *damon_last_region(struct damon_target *t) #ifdef CONFIG_DAMON struct damon_region *damon_new_region(unsigned long start, unsigned long end); -inline void damon_insert_region(struct damon_region *r, + +/* + * Add a region between two other regions + */ +static inline void damon_insert_region(struct damon_region *r, struct damon_region *prev, struct damon_region *next, - struct damon_target *t); + struct damon_target *t) +{ + __list_add(&r->list, &prev->list, &next->list); + t->nr_regions++; +} + void damon_add_region(struct damon_region *r, struct damon_target *t); void damon_destroy_region(struct damon_region *r, struct damon_target *t); -- cgit v1.2.3 From 76fd0285b447991267e838842c0be7395eb454bb Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 14 Jan 2022 14:10:50 -0800 Subject: mm/damon: hide kernel pointer from tracepoint event DAMON's virtual address spaces monitoring primitive uses 'struct pid *' of the target process as its monitoring target id. The kernel address is exposed as-is to the user space via the DAMON tracepoint, 'damon_aggregated'. Though primarily only privileged users are allowed to access that, it would be better to avoid unnecessarily exposing kernel pointers so. Because the trace result is only required to be able to distinguish each target, we aren't need to use the pointer as-is. This makes the tracepoint to use the index of the target in the context's targets list as its id in the tracepoint, to hide the kernel space address. Link: https://lkml.kernel.org/r/20211229131016.23641-5-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/damon.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h index 99ffa601e351..c79f1d4c39af 100644 --- a/include/trace/events/damon.h +++ b/include/trace/events/damon.h @@ -11,10 +11,10 @@ TRACE_EVENT(damon_aggregated, - TP_PROTO(struct damon_target *t, struct damon_region *r, - unsigned int nr_regions), + TP_PROTO(struct damon_target *t, unsigned int target_id, + struct damon_region *r, unsigned int nr_regions), - TP_ARGS(t, r, nr_regions), + TP_ARGS(t, target_id, r, nr_regions), TP_STRUCT__entry( __field(unsigned long, target_id) @@ -26,7 +26,7 @@ TRACE_EVENT(damon_aggregated, ), TP_fast_assign( - __entry->target_id = t->id; + __entry->target_id = target_id; __entry->nr_regions = nr_regions; __entry->start = r->ar.start; __entry->end = r->ar.end; -- cgit v1.2.3 From dea2903719283c156b53741126228c4a1b40440f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 10 Jan 2022 19:00:02 -0500 Subject: cifs: move superblock magic defitions to magic.h Help userland apps to identify cifs and smb2 mounts. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- include/uapi/linux/magic.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 35687dcb1a42..a9f4dcb7b457 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -51,6 +51,7 @@ #define QNX6_SUPER_MAGIC 0x68191122 /* qnx6 fs detection */ #define AFS_FS_MAGIC 0x6B414653 + #define REISERFS_SUPER_MAGIC 0x52654973 /* used by gcc */ /* used by file system utilities that look at the superblock, etc. */ @@ -59,6 +60,9 @@ #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" #define SMB_SUPER_MAGIC 0x517B +#define CIFS_SUPER_MAGIC 0xFF534D42 /* the first four bytes of SMB PDUs */ +#define SMB2_SUPER_MAGIC 0xFE534D42 + #define CGROUP_SUPER_MAGIC 0x27e0eb #define CGROUP2_SUPER_MAGIC 0x63677270 -- cgit v1.2.3 From b7ec62d7ee0f0b8af6ba190501dff7f9ee6545ca Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:16:57 -0700 Subject: bitops: protect find_first_{,zero}_bit properly find_first_bit() and find_first_zero_bit() are not protected with ifdefs as other functions in find.h. It causes build errors on some platforms if CONFIG_GENERIC_FIND_FIRST_BIT is enabled. Signed-off-by: Yury Norov Fixes: 2cc7b6a44ac2 ("lib: add fast path for find_first_*_bit() and find_last_bit()") Reported-by: kernel test robot Tested-by: Wolfram Sang --- include/asm-generic/bitops/find.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 0d132ee2a291..835f959a25f2 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h @@ -97,6 +97,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, #ifdef CONFIG_GENERIC_FIND_FIRST_BIT +#ifndef find_first_bit /** * find_first_bit - find the first set bit in a memory region * @addr: The address to start the search at @@ -116,7 +117,9 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) return _find_first_bit(addr, size); } +#endif +#ifndef find_first_zero_bit /** * find_first_zero_bit - find the first cleared bit in a memory region * @addr: The address to start the search at @@ -136,6 +139,8 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) return _find_first_zero_bit(addr, size); } +#endif + #else /* CONFIG_GENERIC_FIND_FIRST_BIT */ #ifndef find_first_bit -- cgit v1.2.3 From 6b8ecb84f8f64017ae6e56cd745ad88e48f68779 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:16:58 -0700 Subject: bitops: move find_bit_*_le functions from le.h to find.h It's convenient to have all find_bit declarations in one place. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- include/asm-generic/bitops/find.h | 69 +++++++++++++++++++++++++++++++++++++++ include/asm-generic/bitops/le.h | 64 ------------------------------------ 2 files changed, 69 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 835f959a25f2..91b1b23f2b0c 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h @@ -190,4 +190,73 @@ extern unsigned long find_next_clump8(unsigned long *clump, #define find_first_clump8(clump, bits, size) \ find_next_clump8((clump), (bits), (size), 0) +#if defined(__LITTLE_ENDIAN) + +static inline unsigned long find_next_zero_bit_le(const void *addr, + unsigned long size, unsigned long offset) +{ + return find_next_zero_bit(addr, size, offset); +} + +static inline unsigned long find_next_bit_le(const void *addr, + unsigned long size, unsigned long offset) +{ + return find_next_bit(addr, size, offset); +} + +static inline unsigned long find_first_zero_bit_le(const void *addr, + unsigned long size) +{ + return find_first_zero_bit(addr, size); +} + +#elif defined(__BIG_ENDIAN) + +#ifndef find_next_zero_bit_le +static inline +unsigned long find_next_zero_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val = *(const unsigned long *)addr; + + if (unlikely(offset >= size)) + return size; + + val = swab(val) | ~GENMASK(size - 1, offset); + return val == ~0UL ? size : ffz(val); + } + + return _find_next_bit(addr, NULL, size, offset, ~0UL, 1); +} +#endif + +#ifndef find_next_bit_le +static inline +unsigned long find_next_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val = *(const unsigned long *)addr; + + if (unlikely(offset >= size)) + return size; + + val = swab(val) & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_bit(addr, NULL, size, offset, 0UL, 1); +} +#endif + +#ifndef find_first_zero_bit_le +#define find_first_zero_bit_le(addr, size) \ + find_next_zero_bit_le((addr), (size), 0) +#endif + +#else +#error "Please fix " +#endif + #endif /*_ASM_GENERIC_BITOPS_FIND_H_ */ diff --git a/include/asm-generic/bitops/le.h b/include/asm-generic/bitops/le.h index 5a28629cbf4d..d51beff60375 100644 --- a/include/asm-generic/bitops/le.h +++ b/include/asm-generic/bitops/le.h @@ -2,83 +2,19 @@ #ifndef _ASM_GENERIC_BITOPS_LE_H_ #define _ASM_GENERIC_BITOPS_LE_H_ -#include #include #include -#include #if defined(__LITTLE_ENDIAN) #define BITOP_LE_SWIZZLE 0 -static inline unsigned long find_next_zero_bit_le(const void *addr, - unsigned long size, unsigned long offset) -{ - return find_next_zero_bit(addr, size, offset); -} - -static inline unsigned long find_next_bit_le(const void *addr, - unsigned long size, unsigned long offset) -{ - return find_next_bit(addr, size, offset); -} - -static inline unsigned long find_first_zero_bit_le(const void *addr, - unsigned long size) -{ - return find_first_zero_bit(addr, size); -} - #elif defined(__BIG_ENDIAN) #define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) -#ifndef find_next_zero_bit_le -static inline -unsigned long find_next_zero_bit_le(const void *addr, unsigned - long size, unsigned long offset) -{ - if (small_const_nbits(size)) { - unsigned long val = *(const unsigned long *)addr; - - if (unlikely(offset >= size)) - return size; - - val = swab(val) | ~GENMASK(size - 1, offset); - return val == ~0UL ? size : ffz(val); - } - - return _find_next_bit(addr, NULL, size, offset, ~0UL, 1); -} -#endif - -#ifndef find_next_bit_le -static inline -unsigned long find_next_bit_le(const void *addr, unsigned - long size, unsigned long offset) -{ - if (small_const_nbits(size)) { - unsigned long val = *(const unsigned long *)addr; - - if (unlikely(offset >= size)) - return size; - - val = swab(val) & GENMASK(size - 1, offset); - return val ? __ffs(val) : size; - } - - return _find_next_bit(addr, NULL, size, offset, 0UL, 1); -} #endif -#ifndef find_first_zero_bit_le -#define find_first_zero_bit_le(addr, size) \ - find_next_zero_bit_le((addr), (size), 0) -#endif - -#else -#error "Please fix " -#endif static inline int test_bit_le(int nr, const void *addr) { -- cgit v1.2.3 From 47d8c15615c0a2046d2d90b04cb80b81ddf31fb1 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:16:59 -0700 Subject: include: move find.h from asm_generic to linux find_bit API and bitmap API are closely related, but inclusion paths are different - include/asm-generic and include/linux, correspondingly. In the past it made a lot of troubles due to circular dependencies and/or undefined symbols. Fix this by moving find.h under include/linux. Signed-off-by: Yury Norov Tested-by: Wolfram Sang Acked-by: Geert Uytterhoeven --- include/asm-generic/bitops.h | 1 - include/asm-generic/bitops/find.h | 262 ------------------------------------- include/linux/bitmap.h | 1 + include/linux/find.h | 268 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 269 insertions(+), 263 deletions(-) delete mode 100644 include/asm-generic/bitops/find.h create mode 100644 include/linux/find.h (limited to 'include') diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h index df9b5bc3d282..a47b8a71d6fe 100644 --- a/include/asm-generic/bitops.h +++ b/include/asm-generic/bitops.h @@ -20,7 +20,6 @@ #include #include #include -#include #ifndef _LINUX_BITOPS_H #error only can be included directly diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h deleted file mode 100644 index 91b1b23f2b0c..000000000000 --- a/include/asm-generic/bitops/find.h +++ /dev/null @@ -1,262 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_GENERIC_BITOPS_FIND_H_ -#define _ASM_GENERIC_BITOPS_FIND_H_ - -extern unsigned long _find_next_bit(const unsigned long *addr1, - const unsigned long *addr2, unsigned long nbits, - unsigned long start, unsigned long invert, unsigned long le); -extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); -extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); -extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); - -#ifndef find_next_bit -/** - * find_next_bit - find the next set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The bitmap size in bits - * - * Returns the bit number for the next set bit - * If no bits are set, returns @size. - */ -static inline -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - if (small_const_nbits(size)) { - unsigned long val; - - if (unlikely(offset >= size)) - return size; - - val = *addr & GENMASK(size - 1, offset); - return val ? __ffs(val) : size; - } - - return _find_next_bit(addr, NULL, size, offset, 0UL, 0); -} -#endif - -#ifndef find_next_and_bit -/** - * find_next_and_bit - find the next set bit in both memory regions - * @addr1: The first address to base the search on - * @addr2: The second address to base the search on - * @offset: The bitnumber to start searching at - * @size: The bitmap size in bits - * - * Returns the bit number for the next set bit - * If no bits are set, returns @size. - */ -static inline -unsigned long find_next_and_bit(const unsigned long *addr1, - const unsigned long *addr2, unsigned long size, - unsigned long offset) -{ - if (small_const_nbits(size)) { - unsigned long val; - - if (unlikely(offset >= size)) - return size; - - val = *addr1 & *addr2 & GENMASK(size - 1, offset); - return val ? __ffs(val) : size; - } - - return _find_next_bit(addr1, addr2, size, offset, 0UL, 0); -} -#endif - -#ifndef find_next_zero_bit -/** - * find_next_zero_bit - find the next cleared bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The bitmap size in bits - * - * Returns the bit number of the next zero bit - * If no bits are zero, returns @size. - */ -static inline -unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - if (small_const_nbits(size)) { - unsigned long val; - - if (unlikely(offset >= size)) - return size; - - val = *addr | ~GENMASK(size - 1, offset); - return val == ~0UL ? size : ffz(val); - } - - return _find_next_bit(addr, NULL, size, offset, ~0UL, 0); -} -#endif - -#ifdef CONFIG_GENERIC_FIND_FIRST_BIT - -#ifndef find_first_bit -/** - * find_first_bit - find the first set bit in a memory region - * @addr: The address to start the search at - * @size: The maximum number of bits to search - * - * Returns the bit number of the first set bit. - * If no bits are set, returns @size. - */ -static inline -unsigned long find_first_bit(const unsigned long *addr, unsigned long size) -{ - if (small_const_nbits(size)) { - unsigned long val = *addr & GENMASK(size - 1, 0); - - return val ? __ffs(val) : size; - } - - return _find_first_bit(addr, size); -} -#endif - -#ifndef find_first_zero_bit -/** - * find_first_zero_bit - find the first cleared bit in a memory region - * @addr: The address to start the search at - * @size: The maximum number of bits to search - * - * Returns the bit number of the first cleared bit. - * If no bits are zero, returns @size. - */ -static inline -unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) -{ - if (small_const_nbits(size)) { - unsigned long val = *addr | ~GENMASK(size - 1, 0); - - return val == ~0UL ? size : ffz(val); - } - - return _find_first_zero_bit(addr, size); -} -#endif - -#else /* CONFIG_GENERIC_FIND_FIRST_BIT */ - -#ifndef find_first_bit -#define find_first_bit(addr, size) find_next_bit((addr), (size), 0) -#endif -#ifndef find_first_zero_bit -#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) -#endif - -#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ - -#ifndef find_last_bit -/** - * find_last_bit - find the last set bit in a memory region - * @addr: The address to start the search at - * @size: The number of bits to search - * - * Returns the bit number of the last set bit, or size. - */ -static inline -unsigned long find_last_bit(const unsigned long *addr, unsigned long size) -{ - if (small_const_nbits(size)) { - unsigned long val = *addr & GENMASK(size - 1, 0); - - return val ? __fls(val) : size; - } - - return _find_last_bit(addr, size); -} -#endif - -/** - * find_next_clump8 - find next 8-bit clump with set bits in a memory region - * @clump: location to store copy of found clump - * @addr: address to base the search on - * @size: bitmap size in number of bits - * @offset: bit offset at which to start searching - * - * Returns the bit offset for the next set clump; the found clump value is - * copied to the location pointed by @clump. If no bits are set, returns @size. - */ -extern unsigned long find_next_clump8(unsigned long *clump, - const unsigned long *addr, - unsigned long size, unsigned long offset); - -#define find_first_clump8(clump, bits, size) \ - find_next_clump8((clump), (bits), (size), 0) - -#if defined(__LITTLE_ENDIAN) - -static inline unsigned long find_next_zero_bit_le(const void *addr, - unsigned long size, unsigned long offset) -{ - return find_next_zero_bit(addr, size, offset); -} - -static inline unsigned long find_next_bit_le(const void *addr, - unsigned long size, unsigned long offset) -{ - return find_next_bit(addr, size, offset); -} - -static inline unsigned long find_first_zero_bit_le(const void *addr, - unsigned long size) -{ - return find_first_zero_bit(addr, size); -} - -#elif defined(__BIG_ENDIAN) - -#ifndef find_next_zero_bit_le -static inline -unsigned long find_next_zero_bit_le(const void *addr, unsigned - long size, unsigned long offset) -{ - if (small_const_nbits(size)) { - unsigned long val = *(const unsigned long *)addr; - - if (unlikely(offset >= size)) - return size; - - val = swab(val) | ~GENMASK(size - 1, offset); - return val == ~0UL ? size : ffz(val); - } - - return _find_next_bit(addr, NULL, size, offset, ~0UL, 1); -} -#endif - -#ifndef find_next_bit_le -static inline -unsigned long find_next_bit_le(const void *addr, unsigned - long size, unsigned long offset) -{ - if (small_const_nbits(size)) { - unsigned long val = *(const unsigned long *)addr; - - if (unlikely(offset >= size)) - return size; - - val = swab(val) & GENMASK(size - 1, offset); - return val ? __ffs(val) : size; - } - - return _find_next_bit(addr, NULL, size, offset, 0UL, 1); -} -#endif - -#ifndef find_first_zero_bit_le -#define find_first_zero_bit_le(addr, size) \ - find_next_zero_bit_le((addr), (size), 0) -#endif - -#else -#error "Please fix " -#endif - -#endif /*_ASM_GENERIC_BITOPS_FIND_H_ */ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index a241dcf50f39..ead4a150bd7f 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include diff --git a/include/linux/find.h b/include/linux/find.h new file mode 100644 index 000000000000..c5410c243e04 --- /dev/null +++ b/include/linux/find.h @@ -0,0 +1,268 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_FIND_H_ +#define __LINUX_FIND_H_ + +#ifndef __LINUX_BITMAP_H +#error only can be included directly +#endif + +#include + +extern unsigned long _find_next_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long nbits, + unsigned long start, unsigned long invert, unsigned long le); +extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); + +#ifndef find_next_bit +/** + * find_next_bit - find the next set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_bit(addr, NULL, size, offset, 0UL, 0); +} +#endif + +#ifndef find_next_and_bit +/** + * find_next_and_bit - find the next set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr1 & *addr2 & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_bit(addr1, addr2, size, offset, 0UL, 0); +} +#endif + +#ifndef find_next_zero_bit +/** + * find_next_zero_bit - find the next cleared bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number of the next zero bit + * If no bits are zero, returns @size. + */ +static inline +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr | ~GENMASK(size - 1, offset); + return val == ~0UL ? size : ffz(val); + } + + return _find_next_bit(addr, NULL, size, offset, ~0UL, 0); +} +#endif + +#ifdef CONFIG_GENERIC_FIND_FIRST_BIT + +#ifndef find_first_bit +/** + * find_first_bit - find the first set bit in a memory region + * @addr: The address to start the search at + * @size: The maximum number of bits to search + * + * Returns the bit number of the first set bit. + * If no bits are set, returns @size. + */ +static inline +unsigned long find_first_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr & GENMASK(size - 1, 0); + + return val ? __ffs(val) : size; + } + + return _find_first_bit(addr, size); +} +#endif + +#ifndef find_first_zero_bit +/** + * find_first_zero_bit - find the first cleared bit in a memory region + * @addr: The address to start the search at + * @size: The maximum number of bits to search + * + * Returns the bit number of the first cleared bit. + * If no bits are zero, returns @size. + */ +static inline +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr | ~GENMASK(size - 1, 0); + + return val == ~0UL ? size : ffz(val); + } + + return _find_first_zero_bit(addr, size); +} +#endif + +#else /* CONFIG_GENERIC_FIND_FIRST_BIT */ + +#ifndef find_first_bit +#define find_first_bit(addr, size) find_next_bit((addr), (size), 0) +#endif +#ifndef find_first_zero_bit +#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) +#endif + +#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ + +#ifndef find_last_bit +/** + * find_last_bit - find the last set bit in a memory region + * @addr: The address to start the search at + * @size: The number of bits to search + * + * Returns the bit number of the last set bit, or size. + */ +static inline +unsigned long find_last_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr & GENMASK(size - 1, 0); + + return val ? __fls(val) : size; + } + + return _find_last_bit(addr, size); +} +#endif + +/** + * find_next_clump8 - find next 8-bit clump with set bits in a memory region + * @clump: location to store copy of found clump + * @addr: address to base the search on + * @size: bitmap size in number of bits + * @offset: bit offset at which to start searching + * + * Returns the bit offset for the next set clump; the found clump value is + * copied to the location pointed by @clump. If no bits are set, returns @size. + */ +extern unsigned long find_next_clump8(unsigned long *clump, + const unsigned long *addr, + unsigned long size, unsigned long offset); + +#define find_first_clump8(clump, bits, size) \ + find_next_clump8((clump), (bits), (size), 0) + +#if defined(__LITTLE_ENDIAN) + +static inline unsigned long find_next_zero_bit_le(const void *addr, + unsigned long size, unsigned long offset) +{ + return find_next_zero_bit(addr, size, offset); +} + +static inline unsigned long find_next_bit_le(const void *addr, + unsigned long size, unsigned long offset) +{ + return find_next_bit(addr, size, offset); +} + +static inline unsigned long find_first_zero_bit_le(const void *addr, + unsigned long size) +{ + return find_first_zero_bit(addr, size); +} + +#elif defined(__BIG_ENDIAN) + +#ifndef find_next_zero_bit_le +static inline +unsigned long find_next_zero_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val = *(const unsigned long *)addr; + + if (unlikely(offset >= size)) + return size; + + val = swab(val) | ~GENMASK(size - 1, offset); + return val == ~0UL ? size : ffz(val); + } + + return _find_next_bit(addr, NULL, size, offset, ~0UL, 1); +} +#endif + +#ifndef find_next_bit_le +static inline +unsigned long find_next_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val = *(const unsigned long *)addr; + + if (unlikely(offset >= size)) + return size; + + val = swab(val) & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_bit(addr, NULL, size, offset, 0UL, 1); +} +#endif + +#ifndef find_first_zero_bit_le +#define find_first_zero_bit_le(addr, size) \ + find_next_zero_bit_le((addr), (size), 0) +#endif + +#else +#error "Please fix " +#endif + +#endif /*__LINUX_FIND_H_ */ -- cgit v1.2.3 From c126a53c276048125b4a950072bab37ad0fea120 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:00 -0700 Subject: arch: remove GENERIC_FIND_FIRST_BIT entirely In 5.12 cycle we enabled GENERIC_FIND_FIRST_BIT config option for ARM64 and MIPS. It increased performance and shrunk .text size; and so far I didn't receive any negative feedback on the change. https://lore.kernel.org/linux-arch/20210225135700.1381396-1-yury.norov@gmail.com/ Now I think it's a good time to switch all architectures to use find_{first,last}_bit() unconditionally, and so remove corresponding config option. The patch does't introduce functioal changes for arc, arm, arm64, mips, m68k, s390 and x86, for other architectures I expect improvement both in performance and .text size. Signed-off-by: Yury Norov Tested-by: Alexander Lobakin (mips) Reviewed-by: Alexander Lobakin (mips) Reviewed-by: Andy Shevchenko Acked-by: Will Deacon Tested-by: Wolfram Sang --- include/linux/find.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/find.h b/include/linux/find.h index c5410c243e04..ea57f7f38c49 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -101,8 +101,6 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, } #endif -#ifdef CONFIG_GENERIC_FIND_FIRST_BIT - #ifndef find_first_bit /** * find_first_bit - find the first set bit in a memory region @@ -147,17 +145,6 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) } #endif -#else /* CONFIG_GENERIC_FIND_FIRST_BIT */ - -#ifndef find_first_bit -#define find_first_bit(addr, size) find_next_bit((addr), (size), 0) -#endif -#ifndef find_first_zero_bit -#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) -#endif - -#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ - #ifndef find_last_bit /** * find_last_bit - find the last set bit in a memory region -- cgit v1.2.3 From f68edc9297bf3f7c94abb54b9b0b053607f7587b Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:01 -0700 Subject: lib: add find_first_and_bit() Currently find_first_and_bit() is an alias to find_next_and_bit(). However, it is widely used in cpumask, so it worth to optimize it. This patch adds its own implementation for find_first_and_bit(). On x86_64 find_bit_benchmark says: Before (#define find_first_and_bit(...) find_next_and_bit(..., 0): Start testing find_bit() with random-filled bitmap [ 140.291468] find_first_and_bit: 46890919 ns, 32671 iterations Start testing find_bit() with sparse bitmap [ 140.295028] find_first_and_bit: 7103 ns, 1 iterations After: Start testing find_bit() with random-filled bitmap [ 162.574907] find_first_and_bit: 25045813 ns, 32846 iterations Start testing find_bit() with sparse bitmap [ 162.578458] find_first_and_bit: 4900 ns, 1 iterations (Thanks to Alexey Klimov for thorough testing.) Signed-off-by: Yury Norov Tested-by: Wolfram Sang Tested-by: Alexey Klimov --- include/linux/find.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/linux/find.h b/include/linux/find.h index ea57f7f38c49..6048f8c97418 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -12,6 +12,8 @@ extern unsigned long _find_next_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start, unsigned long invert, unsigned long le); extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size); extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); @@ -123,6 +125,31 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) } #endif +#ifndef find_first_and_bit +/** + * find_first_and_bit - find the first set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0); + + return val ? __ffs(val) : size; + } + + return _find_first_and_bit(addr1, addr2, size); +} +#endif + #ifndef find_first_zero_bit /** * find_first_zero_bit - find the first cleared bit in a memory region -- cgit v1.2.3 From 93ba139ba8190c33009c5353ca43c8519443f467 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:02 -0700 Subject: cpumask: use find_first_and_bit() Now we have an efficient implementation for find_first_and_bit(), so switch cpumask to use it where appropriate. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- include/linux/cpumask.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 1e7399fc69c0..c4e1b9ea0ba4 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -123,6 +123,12 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return 0; } +static inline unsigned int cpumask_first_and(const struct cpumask *srcp1, + const struct cpumask *srcp2) +{ + return 0; +} + static inline unsigned int cpumask_last(const struct cpumask *srcp) { return 0; @@ -167,7 +173,7 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node) static inline int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p) { - return cpumask_next_and(-1, src1p, src2p); + return cpumask_first_and(src1p, src2p); } static inline int cpumask_any_distribute(const struct cpumask *srcp) @@ -195,6 +201,19 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); } +/** + * cpumask_first_and - return the first cpu from *srcp1 & *srcp2 + * @src1p: the first input + * @src2p: the second input + * + * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). + */ +static inline +unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) +{ + return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits); +} + /** * cpumask_last - get the last CPU in a cpumask * @srcp: - the cpumask pointer @@ -585,15 +604,6 @@ static inline void cpumask_copy(struct cpumask *dstp, */ #define cpumask_any(srcp) cpumask_first(srcp) -/** - * cpumask_first_and - return the first cpu from *srcp1 & *srcp2 - * @src1p: the first input - * @src2p: the second input - * - * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). - */ -#define cpumask_first_and(src1p, src2p) cpumask_next_and(-1, (src1p), (src2p)) - /** * cpumask_any_and - pick a "random" cpu from *mask1 & *mask2 * @mask1: the first input cpumask -- cgit v1.2.3 From 9b51d9d866482a703646fd4c07e433c3d9d88efd Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:05 -0700 Subject: cpumask: replace cpumask_next_* with cpumask_first_* where appropriate cpumask_first() is a more effective analogue of 'next' version if n == -1 (which means start == 0). This patch replaces 'next' with 'first' where things look trivial. There's no cpumask_first_zero() function, so create it. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- include/linux/cpumask.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c4e1b9ea0ba4..64dae70d31f5 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -123,6 +123,11 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return 0; } +static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) +{ + return 0; +} + static inline unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) { @@ -201,6 +206,17 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); } +/** + * cpumask_first_zero - get the first unset cpu in a cpumask + * @srcp: the cpumask pointer + * + * Returns >= nr_cpu_ids if all cpus are set. + */ +static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) +{ + return find_first_zero_bit(cpumask_bits(srcp), nr_cpumask_bits); +} + /** * cpumask_first_and - return the first cpu from *srcp1 & *srcp2 * @src1p: the first input -- cgit v1.2.3 From bc9d6635c293a2ac30c6319f7cfd08860ab7948a Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:06 -0700 Subject: include/linux: move for_each_bit() macros from bitops.h to find.h for_each_bit() macros depend on find_bit() machinery, and so the proper place for them is the find.h header. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- include/linux/bitops.h | 34 ---------------------------------- include/linux/find.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 5e62e2383b7f..7aaed501f768 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -32,40 +32,6 @@ extern unsigned long __sw_hweight64(__u64 w); */ #include -#define for_each_set_bit(bit, addr, size) \ - for ((bit) = find_first_bit((addr), (size)); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) - -/* same as for_each_set_bit() but use bit as value to start with */ -#define for_each_set_bit_from(bit, addr, size) \ - for ((bit) = find_next_bit((addr), (size), (bit)); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) - -#define for_each_clear_bit(bit, addr, size) \ - for ((bit) = find_first_zero_bit((addr), (size)); \ - (bit) < (size); \ - (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) - -/* same as for_each_clear_bit() but use bit as value to start with */ -#define for_each_clear_bit_from(bit, addr, size) \ - for ((bit) = find_next_zero_bit((addr), (size), (bit)); \ - (bit) < (size); \ - (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) - -/** - * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits - * @start: bit offset to start search and to store the current iteration offset - * @clump: location to store copy of current 8-bit clump - * @bits: bitmap address to base the search on - * @size: bitmap size in number of bits - */ -#define for_each_set_clump8(start, clump, bits, size) \ - for ((start) = find_first_clump8(&(clump), (bits), (size)); \ - (start) < (size); \ - (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8)) - static inline int get_bitmask_order(unsigned int count) { int order; diff --git a/include/linux/find.h b/include/linux/find.h index 6048f8c97418..4500e8ab93e2 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -279,4 +279,38 @@ unsigned long find_next_bit_le(const void *addr, unsigned #error "Please fix " #endif +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +/* same as for_each_set_bit() but use bit as value to start with */ +#define for_each_set_bit_from(bit, addr, size) \ + for ((bit) = find_next_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +#define for_each_clear_bit(bit, addr, size) \ + for ((bit) = find_first_zero_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + +/* same as for_each_clear_bit() but use bit as value to start with */ +#define for_each_clear_bit_from(bit, addr, size) \ + for ((bit) = find_next_zero_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + +/** + * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits + * @start: bit offset to start search and to store the current iteration offset + * @clump: location to store copy of current 8-bit clump + * @bits: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_set_clump8(start, clump, bits, size) \ + for ((start) = find_first_clump8(&(clump), (bits), (size)); \ + (start) < (size); \ + (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8)) + #endif /*__LINUX_FIND_H_ */ -- cgit v1.2.3 From 7516be9931b8bc8bcaac8531f490b42ab11ded1e Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:07 -0700 Subject: find: micro-optimize for_each_{set,clear}_bit() The macros iterate thru all set/clear bits in a bitmap. They search a first bit using find_first_bit(), and the rest bits using find_next_bit(). Since find_next_bit() is called shortly after find_first_bit(), we can save few lines of I-cache by not using find_first_bit(). Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- include/linux/find.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/find.h b/include/linux/find.h index 4500e8ab93e2..ae9ed52b52b8 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -280,7 +280,7 @@ unsigned long find_next_bit_le(const void *addr, unsigned #endif #define for_each_set_bit(bit, addr, size) \ - for ((bit) = find_first_bit((addr), (size)); \ + for ((bit) = find_next_bit((addr), (size), 0); \ (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) @@ -291,7 +291,7 @@ unsigned long find_next_bit_le(const void *addr, unsigned (bit) = find_next_bit((addr), (size), (bit) + 1)) #define for_each_clear_bit(bit, addr, size) \ - for ((bit) = find_first_zero_bit((addr), (size)); \ + for ((bit) = find_next_zero_bit((addr), (size), 0); \ (bit) < (size); \ (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) -- cgit v1.2.3 From ec288a2cf7ca40a939316b6df206ab845bb112d1 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:11 -0700 Subject: bitmap: unify find_bit operations bitmap_for_each_{set,clear}_region() are similar to for_each_bit() macros in include/linux/find.h, but interface and implementation of them are different. This patch adds for_each_bitrange() macros and drops unused bitmap_*_region() API in sake of unification. Signed-off-by: Yury Norov Tested-by: Wolfram Sang Acked-by: Dennis Zhou Acked-by: Ulf Hansson # For MMC --- include/linux/bitmap.h | 33 ----------------------------- include/linux/find.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index ead4a150bd7f..7dba0847510c 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -55,12 +55,6 @@ struct device; * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area * bitmap_find_next_zero_area_off(buf, len, pos, n, mask, mask_off) as above - * bitmap_next_clear_region(map, &start, &end, nbits) Find next clear region - * bitmap_next_set_region(map, &start, &end, nbits) Find next set region - * bitmap_for_each_clear_region(map, rs, re, start, end) - * Iterate over all clear regions - * bitmap_for_each_set_region(map, rs, re, start, end) - * Iterate over all set regions * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n * bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest @@ -467,14 +461,6 @@ static inline void bitmap_replace(unsigned long *dst, __bitmap_replace(dst, old, new, mask, nbits); } -static inline void bitmap_next_clear_region(unsigned long *bitmap, - unsigned int *rs, unsigned int *re, - unsigned int end) -{ - *rs = find_next_zero_bit(bitmap, end, *rs); - *re = find_next_bit(bitmap, end, *rs + 1); -} - static inline void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs, unsigned int *re, unsigned int end) @@ -483,25 +469,6 @@ static inline void bitmap_next_set_region(unsigned long *bitmap, *re = find_next_zero_bit(bitmap, end, *rs + 1); } -/* - * Bitmap region iterators. Iterates over the bitmap between [@start, @end). - * @rs and @re should be integer variables and will be set to start and end - * index of the current clear or set region. - */ -#define bitmap_for_each_clear_region(bitmap, rs, re, start, end) \ - for ((rs) = (start), \ - bitmap_next_clear_region((bitmap), &(rs), &(re), (end)); \ - (rs) < (re); \ - (rs) = (re) + 1, \ - bitmap_next_clear_region((bitmap), &(rs), &(re), (end))) - -#define bitmap_for_each_set_region(bitmap, rs, re, start, end) \ - for ((rs) = (start), \ - bitmap_next_set_region((bitmap), &(rs), &(re), (end)); \ - (rs) < (re); \ - (rs) = (re) + 1, \ - bitmap_next_set_region((bitmap), &(rs), &(re), (end))) - /** * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap. * @n: u64 value diff --git a/include/linux/find.h b/include/linux/find.h index ae9ed52b52b8..5bb6db213bcb 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -301,6 +301,62 @@ unsigned long find_next_bit_le(const void *addr, unsigned (bit) < (size); \ (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) +/** + * for_each_set_bitrange - iterate over all set bit ranges [b; e) + * @b: bit offset of start of current bitrange (first set bit) + * @e: bit offset of end of current bitrange (first unset bit) + * @addr: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_set_bitrange(b, e, addr, size) \ + for ((b) = find_next_bit((addr), (size), 0), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1); \ + (b) < (size); \ + (b) = find_next_bit((addr), (size), (e) + 1), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1)) + +/** + * for_each_set_bitrange_from - iterate over all set bit ranges [b; e) + * @b: bit offset of start of current bitrange (first set bit); must be initialized + * @e: bit offset of end of current bitrange (first unset bit) + * @addr: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_set_bitrange_from(b, e, addr, size) \ + for ((b) = find_next_bit((addr), (size), (b)), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1); \ + (b) < (size); \ + (b) = find_next_bit((addr), (size), (e) + 1), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1)) + +/** + * for_each_clear_bitrange - iterate over all unset bit ranges [b; e) + * @b: bit offset of start of current bitrange (first unset bit) + * @e: bit offset of end of current bitrange (first set bit) + * @addr: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_clear_bitrange(b, e, addr, size) \ + for ((b) = find_next_zero_bit((addr), (size), 0), \ + (e) = find_next_bit((addr), (size), (b) + 1); \ + (b) < (size); \ + (b) = find_next_zero_bit((addr), (size), (e) + 1), \ + (e) = find_next_bit((addr), (size), (b) + 1)) + +/** + * for_each_clear_bitrange_from - iterate over all unset bit ranges [b; e) + * @b: bit offset of start of current bitrange (first set bit); must be initialized + * @e: bit offset of end of current bitrange (first unset bit) + * @addr: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_clear_bitrange_from(b, e, addr, size) \ + for ((b) = find_next_zero_bit((addr), (size), (b)), \ + (e) = find_next_bit((addr), (size), (b) + 1); \ + (b) < (size); \ + (b) = find_next_zero_bit((addr), (size), (e) + 1), \ + (e) = find_next_bit((addr), (size), (b) + 1)) + /** * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits * @start: bit offset to start search and to store the current iteration offset -- cgit v1.2.3 From 7372971c1be5b7d4fdd8ad237798bdc1d1d54162 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 11 Jan 2022 10:19:22 +0300 Subject: rtc: mc146818-lib: fix signedness bug in mc146818_get_time() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mc146818_get_time() function returns zero on success or negative a error code on failure. It needs to be type int. Fixes: d35786b3a28d ("rtc: mc146818-lib: change return values of mc146818_get_time()") Signed-off-by: Dan Carpenter Reviewed-by: Mateusz Jończyk Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220111071922.GE11243@kili --- include/linux/mc146818rtc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index 67fb0a12becc..808bb4cee230 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -124,7 +124,7 @@ struct cmos_rtc_board_info { #endif /* ARCH_RTC_LOCATION */ bool mc146818_does_rtc_work(void); -unsigned int mc146818_get_time(struct rtc_time *time); +int mc146818_get_time(struct rtc_time *time); int mc146818_set_time(struct rtc_time *time); bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), -- cgit v1.2.3 From 3fe7fa5843d204e235d92902190fecb972a3f9cc Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 10 Dec 2021 15:09:21 -0500 Subject: mm: Add folio_put_refs() This is like folio_put(), but puts N references at once instead of just one. It's like put_page_refs(), but does one atomic operation instead of two, and is available to more than just gup.c. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Reviewed-by: William Kucharski --- include/linux/mm.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c768a7c81b0b..cb98f75b245e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1244,6 +1244,26 @@ static inline void folio_put(struct folio *folio) __put_page(&folio->page); } +/** + * folio_put_refs - Reduce the reference count on a folio. + * @folio: The folio. + * @refs: The amount to subtract from the folio's reference count. + * + * If the folio's reference count reaches zero, the memory will be + * released back to the page allocator and may be used by another + * allocation immediately. Do not access the memory or the struct folio + * after calling folio_put_refs() unless you can be sure that these weren't + * the last references. + * + * Context: May be called in process or interrupt context, but not in NMI + * context. May be called while holding a spinlock. + */ +static inline void folio_put_refs(struct folio *folio, int refs) +{ + if (folio_ref_sub_and_test(folio, refs)) + __put_page(&folio->page); +} + static inline void put_page(struct page *page) { struct folio *folio = page_folio(page); -- cgit v1.2.3 From a6097180d884ddab769fb25588ea8598589c218c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 17 Jan 2022 09:07:26 +1100 Subject: devtmpfs regression fix: reconfigure on each mount Prior to Linux v5.4 devtmpfs used mount_single() which treats the given mount options as "remount" options, so it updates the configuration of the single super_block on each mount. Since that was changed, the mount options used for devtmpfs are ignored. This is a regression which affect systemd - which mounts devtmpfs with "-o mode=755,size=4m,nr_inodes=1m". This patch restores the "remount" effect by calling reconfigure_single() Fixes: d401727ea0d7 ("devtmpfs: don't mix {ramfs,shmem}_fill_super() with mount_single()") Acked-by: Christian Brauner Cc: Al Viro Signed-off-by: NeilBrown Signed-off-by: Linus Torvalds --- include/linux/fs_context.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 6b54982fc5f3..13fa6f3df8e4 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -142,6 +142,8 @@ extern void put_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param_source(struct fs_context *fc, struct fs_parameter *param); extern void fc_drop_locked(struct fs_context *fc); +int reconfigure_single(struct super_block *s, + int flags, void *data); /* * sget() wrappers to be called from the ->get_tree() op. -- cgit v1.2.3 From d8d83d8ab0a453e17e68b3a3bed1f940c34b8646 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 11 Jan 2022 14:37:41 +0100 Subject: lib/crypto: blake2s: move hmac construction into wireguard Basically nobody should use blake2s in an HMAC construction; it already has a keyed variant. But unfortunately for historical reasons, Noise, used by WireGuard, uses HKDF quite strictly, which means we have to use this. Because this really shouldn't be used by others, this commit moves it into wireguard's noise.c locally, so that kernels that aren't using WireGuard don't get this superfluous code baked in. On m68k systems, this shaves off ~314 bytes. Cc: Herbert Xu Tested-by: Geert Uytterhoeven Acked-by: Ard Biesheuvel Signed-off-by: Jason A. Donenfeld --- include/crypto/blake2s.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h index df3c6c2f9553..f9ffd39194eb 100644 --- a/include/crypto/blake2s.h +++ b/include/crypto/blake2s.h @@ -101,7 +101,4 @@ static inline void blake2s(u8 *out, const u8 *in, const u8 *key, blake2s_final(&state, out); } -void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, - const size_t keylen); - #endif /* _CRYPTO_BLAKE2S_H */ -- cgit v1.2.3 From 90ed1e67e896cc8040a523f8428fc02f9b164394 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 12 Jan 2022 17:18:08 +0100 Subject: random: rather than entropy_store abstraction, use global Originally, the RNG used several pools, so having things abstracted out over a generic entropy_store object made sense. These days, there's only one input pool, and then an uneven mix of usage via the abstraction and usage via &input_pool. Rather than this uneasy mixture, just get rid of the abstraction entirely and have things always use the global. This simplifies the code and makes reading it a bit easier. Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- include/trace/events/random.h | 56 ++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/trace/events/random.h b/include/trace/events/random.h index 3d7b432ca5f3..a2d9aa16a5d7 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -28,80 +28,71 @@ TRACE_EVENT(add_device_randomness, ); DECLARE_EVENT_CLASS(random__mix_pool_bytes, - TP_PROTO(const char *pool_name, int bytes, unsigned long IP), + TP_PROTO(int bytes, unsigned long IP), - TP_ARGS(pool_name, bytes, IP), + TP_ARGS(bytes, IP), TP_STRUCT__entry( - __field( const char *, pool_name ) __field( int, bytes ) __field(unsigned long, IP ) ), TP_fast_assign( - __entry->pool_name = pool_name; __entry->bytes = bytes; __entry->IP = IP; ), - TP_printk("%s pool: bytes %d caller %pS", - __entry->pool_name, __entry->bytes, (void *)__entry->IP) + TP_printk("input pool: bytes %d caller %pS", + __entry->bytes, (void *)__entry->IP) ); DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, - TP_PROTO(const char *pool_name, int bytes, unsigned long IP), + TP_PROTO(int bytes, unsigned long IP), - TP_ARGS(pool_name, bytes, IP) + TP_ARGS(bytes, IP) ); DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, - TP_PROTO(const char *pool_name, int bytes, unsigned long IP), + TP_PROTO(int bytes, unsigned long IP), - TP_ARGS(pool_name, bytes, IP) + TP_ARGS(bytes, IP) ); TRACE_EVENT(credit_entropy_bits, - TP_PROTO(const char *pool_name, int bits, int entropy_count, - unsigned long IP), + TP_PROTO(int bits, int entropy_count, unsigned long IP), - TP_ARGS(pool_name, bits, entropy_count, IP), + TP_ARGS(bits, entropy_count, IP), TP_STRUCT__entry( - __field( const char *, pool_name ) __field( int, bits ) __field( int, entropy_count ) __field(unsigned long, IP ) ), TP_fast_assign( - __entry->pool_name = pool_name; __entry->bits = bits; __entry->entropy_count = entropy_count; __entry->IP = IP; ), - TP_printk("%s pool: bits %d entropy_count %d caller %pS", - __entry->pool_name, __entry->bits, - __entry->entropy_count, (void *)__entry->IP) + TP_printk("input pool: bits %d entropy_count %d caller %pS", + __entry->bits, __entry->entropy_count, (void *)__entry->IP) ); TRACE_EVENT(debit_entropy, - TP_PROTO(const char *pool_name, int debit_bits), + TP_PROTO(int debit_bits), - TP_ARGS(pool_name, debit_bits), + TP_ARGS( debit_bits), TP_STRUCT__entry( - __field( const char *, pool_name ) __field( int, debit_bits ) ), TP_fast_assign( - __entry->pool_name = pool_name; __entry->debit_bits = debit_bits; ), - TP_printk("%s: debit_bits %d", __entry->pool_name, - __entry->debit_bits) + TP_printk("input pool: debit_bits %d", __entry->debit_bits) ); TRACE_EVENT(add_input_randomness, @@ -170,36 +161,31 @@ DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, ); DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(const char *pool_name, int nbytes, int entropy_count, - unsigned long IP), + TP_PROTO(int nbytes, int entropy_count, unsigned long IP), - TP_ARGS(pool_name, nbytes, entropy_count, IP), + TP_ARGS(nbytes, entropy_count, IP), TP_STRUCT__entry( - __field( const char *, pool_name ) __field( int, nbytes ) __field( int, entropy_count ) __field(unsigned long, IP ) ), TP_fast_assign( - __entry->pool_name = pool_name; __entry->nbytes = nbytes; __entry->entropy_count = entropy_count; __entry->IP = IP; ), - TP_printk("%s pool: nbytes %d entropy_count %d caller %pS", - __entry->pool_name, __entry->nbytes, __entry->entropy_count, - (void *)__entry->IP) + TP_printk("input pool: nbytes %d entropy_count %d caller %pS", + __entry->nbytes, __entry->entropy_count, (void *)__entry->IP) ); DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(const char *pool_name, int nbytes, int entropy_count, - unsigned long IP), + TP_PROTO(int nbytes, int entropy_count, unsigned long IP), - TP_ARGS(pool_name, nbytes, entropy_count, IP) + TP_ARGS(nbytes, entropy_count, IP) ); TRACE_EVENT(urandom_read, -- cgit v1.2.3 From be80a1d3f9dbe5aee79a325964f7037fe2d92f30 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 10 Jan 2022 14:05:49 +0000 Subject: bpf: Generalize check_ctx_reg for reuse with other types Generalize the check_ctx_reg() helper function into a more generic named one so that it can be reused for other register types as well to check whether their offset is non-zero. No functional change. Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 143401d4c9d9..e9993172f892 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -519,8 +519,8 @@ bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, void bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); -int check_ctx_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno); +int check_ptr_off_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size); -- cgit v1.2.3 From a672b2e36a648afb04ad3bda93b6bda947a479a5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 13 Jan 2022 11:11:30 +0000 Subject: bpf: Fix ringbuf memory type confusion when passing to helpers The bpf_ringbuf_submit() and bpf_ringbuf_discard() have ARG_PTR_TO_ALLOC_MEM in their bpf_func_proto definition as their first argument, and thus both expect the result from a prior bpf_ringbuf_reserve() call which has a return type of RET_PTR_TO_ALLOC_MEM_OR_NULL. While the non-NULL memory from bpf_ringbuf_reserve() can be passed to other helpers, the two sinks (bpf_ringbuf_submit(), bpf_ringbuf_discard()) right now only enforce a register type of PTR_TO_MEM. This can lead to potential type confusion since it would allow other PTR_TO_MEM memory to be passed into the two sinks which did not come from bpf_ringbuf_reserve(). Add a new MEM_ALLOC composable type attribute for PTR_TO_MEM, and enforce that: - bpf_ringbuf_reserve() returns NULL or PTR_TO_MEM | MEM_ALLOC - bpf_ringbuf_submit() and bpf_ringbuf_discard() only take PTR_TO_MEM | MEM_ALLOC but not plain PTR_TO_MEM arguments via ARG_PTR_TO_ALLOC_MEM - however, other helpers might treat PTR_TO_MEM | MEM_ALLOC as plain PTR_TO_MEM to populate the memory area when they use ARG_PTR_TO_{UNINIT_,}MEM in their func proto description Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Reported-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Alexei Starovoitov --- include/linux/bpf.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6e947cd91152..fa517ae604ad 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -316,7 +316,12 @@ enum bpf_type_flag { */ MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS), - __BPF_TYPE_LAST_FLAG = MEM_RDONLY, + /* MEM was "allocated" from a different helper, and cannot be mixed + * with regular non-MEM_ALLOC'ed MEM types. + */ + MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_LAST_FLAG = MEM_ALLOC, }; /* Max number of base types. */ @@ -400,7 +405,7 @@ enum bpf_return_type { RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET, RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, - RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, + RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM, RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is -- cgit v1.2.3 From e6eec09b7bc7869a49ac0ff376415bad40030ade Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 8 Dec 2021 01:52:15 +0000 Subject: KVM: Drop unused kvm_vcpu.pre_pcpu field Remove kvm_vcpu.pre_pcpu as it no longer has any users. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Maxim Levitsky Message-Id: <20211208015236.1616697-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3c47b146851a..5c3c67b6318f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -309,7 +309,6 @@ struct kvm_vcpu { u64 requests; unsigned long guest_debug; - int pre_pcpu; struct list_head blocked_vcpu_list; struct mutex mutex; -- cgit v1.2.3 From 12a8eee5686ef3ea7d8db90cd664f11e4a39e349 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 8 Dec 2021 01:52:16 +0000 Subject: KVM: Move x86 VMX's posted interrupt list_head to vcpu_vmx Move the seemingly generic block_vcpu_list from kvm_vcpu to vcpu_vmx, and rename the list and all associated variables to clarify that it tracks the set of vCPU that need to be poked on a posted interrupt to the wakeup vector. The list is not used to track _all_ vCPUs that are blocking, and the term "blocked" can be misleading as it may refer to a blocking condition in the host or the guest, where as the PI wakeup case is specifically for the vCPUs that are actively blocking from within the guest. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Maxim Levitsky Message-Id: <20211208015236.1616697-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5c3c67b6318f..f079820f52b5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -309,8 +309,6 @@ struct kvm_vcpu { u64 requests; unsigned long guest_debug; - struct list_head blocked_vcpu_list; - struct mutex mutex; struct kvm_run *run; -- cgit v1.2.3 From 60639f74c2f4fcc3ffa2ac0b120eaa874ccc713f Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 6 Dec 2021 11:46:50 +0100 Subject: asm-generic: Prepare for riscv use of pud_alloc_one and pud_free In the following commits, riscv will almost use the generic versions of pud_alloc_one and pud_free but an additional check is required since those functions are only relevant when using at least a 4-level page table, which will be determined at runtime on riscv. So move the content of those functions into other functions that riscv can use without duplicating code. Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- include/asm-generic/pgalloc.h | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index 02932efad3ab..977bea16cf1b 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -147,6 +147,15 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #if CONFIG_PGTABLE_LEVELS > 3 +static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + gfp_t gfp = GFP_PGTABLE_USER; + + if (mm == &init_mm) + gfp = GFP_PGTABLE_KERNEL; + return (pud_t *)get_zeroed_page(gfp); +} + #ifndef __HAVE_ARCH_PUD_ALLOC_ONE /** * pud_alloc_one - allocate a page for PUD-level page table @@ -159,20 +168,23 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) */ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - gfp_t gfp = GFP_PGTABLE_USER; - - if (mm == &init_mm) - gfp = GFP_PGTABLE_KERNEL; - return (pud_t *)get_zeroed_page(gfp); + return __pud_alloc_one(mm, addr); } #endif -static inline void pud_free(struct mm_struct *mm, pud_t *pud) +static inline void __pud_free(struct mm_struct *mm, pud_t *pud) { BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); free_page((unsigned long)pud); } +#ifndef __HAVE_ARCH_PUD_FREE +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + __pud_free(mm, pud); +} +#endif + #endif /* CONFIG_PGTABLE_LEVELS > 3 */ #ifndef __HAVE_ARCH_PGD_FREE -- cgit v1.2.3 From 1ca3fb3abd2b615c4b61728de545760a6e2c2d8b Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 19 Jan 2022 18:07:45 -0800 Subject: mm: percpu: add pcpu_fc_cpu_to_node_fn_t typedef Add pcpu_fc_cpu_to_node_fn_t and pass it into pcpu_fc_alloc_fn_t, pcpu first chunk allocation will call it to alloc memblock on the corresponding node by it, this is prepare for the next patch. Link: https://lkml.kernel.org/r/20211216112359.103822-3-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Thomas Bogendoerfer Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Dennis Zhou Cc: Tejun Heo Cc: Christoph Lameter Cc: Albert Ou Cc: Catalin Marinas Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index ae4004e7957e..e4078bf45fd5 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -94,8 +94,9 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR]; extern enum pcpu_fc pcpu_chosen_fc; -typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, - size_t align); +typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu); +typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align, + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn); typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); @@ -111,12 +112,14 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, size_t atom_size, pcpu_fc_cpu_distance_fn_t cpu_distance_fn, + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn); #endif #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK extern int __init pcpu_page_first_chunk(size_t reserved_size, + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_populate_pte_fn_t populate_pte_fn); -- cgit v1.2.3 From 23f917169ef157aa7a6bf80d8c4aad6f1282852c Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 19 Jan 2022 18:07:49 -0800 Subject: mm: percpu: add generic pcpu_fc_alloc/free funciton With the previous patch, we could add a generic pcpu first chunk allocate and free function to cleanup the duplicated definations on each architecture. Link: https://lkml.kernel.org/r/20211216112359.103822-4-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Thomas Bogendoerfer Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Greg Kroah-Hartman Cc: Dennis Zhou Cc: Tejun Heo Cc: Christoph Lameter Cc: Albert Ou Cc: Catalin Marinas Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: "Rafael J. Wysocki" Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index e4078bf45fd5..d73c97ef4ff4 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -95,9 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR]; extern enum pcpu_fc pcpu_chosen_fc; typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu); -typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, size_t align, - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn); -typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); @@ -112,16 +109,12 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, size_t atom_size, pcpu_fc_cpu_distance_fn_t cpu_distance_fn, - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn, - pcpu_fc_alloc_fn_t alloc_fn, - pcpu_fc_free_fn_t free_fn); + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn); #endif #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK extern int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn, - pcpu_fc_alloc_fn_t alloc_fn, - pcpu_fc_free_fn_t free_fn, pcpu_fc_populate_pte_fn_t populate_pte_fn); #endif -- cgit v1.2.3 From 20c035764626c56c4f6514936b9ee4be0f4cd962 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 19 Jan 2022 18:07:53 -0800 Subject: mm: percpu: add generic pcpu_populate_pte() function With NEED_PER_CPU_PAGE_FIRST_CHUNK enabled, we need a function to populate pte, this patch adds a generic pcpu populate pte function, pcpu_populate_pte(), which is marked __weak and used on most architectures, but it is overridden on x86, which has its own implementation. Link: https://lkml.kernel.org/r/20211216112359.103822-5-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Dennis Zhou Cc: Tejun Heo Cc: Christoph Lameter Cc: Albert Ou Cc: Catalin Marinas Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Thomas Bogendoerfer Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d73c97ef4ff4..f1ec5ad1351c 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -95,7 +95,6 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR]; extern enum pcpu_fc pcpu_chosen_fc; typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu); -typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, @@ -113,9 +112,9 @@ extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, #endif #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK +void __init pcpu_populate_pte(unsigned long addr); extern int __init pcpu_page_first_chunk(size_t reserved_size, - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn, - pcpu_fc_populate_pte_fn_t populate_pte_fn); + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn); #endif extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1); -- cgit v1.2.3 From ae62fbe299629d3b2fa61d4cf5146258c4d99fdf Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 19 Jan 2022 18:08:00 -0800 Subject: proc: make the proc_create[_data]() stubs static inlines Change the proc_create[_data]() stubs which are used when CONFIG_PROC_FS is not set from #defines to a static inline stubs. This should fix clang -Werror builds failing due to errors like this: drivers/platform/x86/thinkpad_acpi.c:918:30: error: unused variable 'dispatch_proc_ops' [-Werror,-Wunused-const-variable] Fixing this in include/linux/proc_fs.h should ensure that the same issue is also fixed in any other drivers hitting the same -Werror issue. [akpm@linux-foundation.org: fix CONFIG_PROC_FS=n] [akpm@linux-foundation.org: fix arch/sparc/kernel/led.c] [akpm@linux-foundation.org: fix build] Link: https://lkml.kernel.org/r/20211116131112.508304-1-hdegoede@redhat.com Signed-off-by: Hans de Goede Reported-by: kernel test robot Acked-by: Christian Brauner Cc: Alexander Viro Cc: Hans de Goede Cc: David Howells Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 069c7fd95396..01b9268451a8 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -178,8 +178,16 @@ static inline struct proc_dir_entry *proc_mkdir_mode(const char *name, #define proc_create_seq(name, mode, parent, ops) ({NULL;}) #define proc_create_single(name, mode, parent, show) ({NULL;}) #define proc_create_single_data(name, mode, parent, show, data) ({NULL;}) -#define proc_create(name, mode, parent, proc_ops) ({NULL;}) -#define proc_create_data(name, mode, parent, proc_ops, data) ({NULL;}) + +static inline struct proc_dir_entry * +proc_create(const char *name, umode_t mode, struct proc_dir_entry *parent, + const struct proc_ops *proc_ops) +{ return NULL; } + +static inline struct proc_dir_entry * +proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, + const struct proc_ops *proc_ops, void *data) +{ return NULL; } static inline void proc_set_size(struct proc_dir_entry *de, loff_t size) {} static inline void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid) {} -- cgit v1.2.3 From 22c033989c3eb9731ad0c497dfab4231b8e367d6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 19 Jan 2022 18:08:12 -0800 Subject: include/linux/unaligned: replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. The rest of the changes are induced by the above and may not be split. Link: https://lkml.kernel.org/r/20211209123823.20425-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Arend van Spriel [brcmfmac] Acked-by: Kalle Valo Cc: Arend van Spriel Cc: Franky Lin Cc: Hante Meuleman Cc: Chi-hsien Lin Cc: Wright Feng Cc: Chung-hsien Hsu Cc: Kalle Valo Cc: David S. Miller Cc: Jakub Kicinski Cc: Heikki Krogerus Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/unaligned/packed_struct.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/unaligned/packed_struct.h b/include/linux/unaligned/packed_struct.h index c0d817de4df2..f4c8eaf4d012 100644 --- a/include/linux/unaligned/packed_struct.h +++ b/include/linux/unaligned/packed_struct.h @@ -1,7 +1,7 @@ #ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H #define _LINUX_UNALIGNED_PACKED_STRUCT_H -#include +#include struct __una_u16 { u16 x; } __packed; struct __una_u32 { u32 x; } __packed; -- cgit v1.2.3 From 40cbf09f060c8febef64541c463d4dd526abe445 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 19 Jan 2022 18:08:16 -0800 Subject: kernel.h: include a note to discourage people from including it in headers Include a note at the top to discourage people from including it in headers. Link: https://lkml.kernel.org/r/20211209150803.4473-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 77755ac3e189..36a612d82956 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -1,4 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * NOTE: + * + * This header has combined a lot of unrelated to each other stuff. + * The process of splitting its content is in progress while keeping + * backward compatibility. That's why it's highly recommended NOT to + * include this header inside another header file, especially under + * generic or architectural include/ directory. + */ #ifndef _LINUX_KERNEL_H #define _LINUX_KERNEL_H -- cgit v1.2.3 From 95af469c4f609de011debc08e7a35b45201623a8 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 19 Jan 2022 18:08:29 -0800 Subject: fs/binfmt_elf: replace open-coded string copy with get_task_comm It is better to use get_task_comm() instead of the open coded string copy as we do in other places. struct elf_prpsinfo is used to dump the task information in userspace coredump or kernel vmcore. Below is the verification of vmcore, crash> ps PID PPID CPU TASK ST %MEM VSZ RSS COMM 0 0 0 ffffffff9d21a940 RU 0.0 0 0 [swapper/0] > 0 0 1 ffffa09e40f85e80 RU 0.0 0 0 [swapper/1] > 0 0 2 ffffa09e40f81f80 RU 0.0 0 0 [swapper/2] > 0 0 3 ffffa09e40f83f00 RU 0.0 0 0 [swapper/3] > 0 0 4 ffffa09e40f80000 RU 0.0 0 0 [swapper/4] > 0 0 5 ffffa09e40f89f80 RU 0.0 0 0 [swapper/5] 0 0 6 ffffa09e40f8bf00 RU 0.0 0 0 [swapper/6] > 0 0 7 ffffa09e40f88000 RU 0.0 0 0 [swapper/7] > 0 0 8 ffffa09e40f8de80 RU 0.0 0 0 [swapper/8] > 0 0 9 ffffa09e40f95e80 RU 0.0 0 0 [swapper/9] > 0 0 10 ffffa09e40f91f80 RU 0.0 0 0 [swapper/10] > 0 0 11 ffffa09e40f93f00 RU 0.0 0 0 [swapper/11] > 0 0 12 ffffa09e40f90000 RU 0.0 0 0 [swapper/12] > 0 0 13 ffffa09e40f9bf00 RU 0.0 0 0 [swapper/13] > 0 0 14 ffffa09e40f98000 RU 0.0 0 0 [swapper/14] > 0 0 15 ffffa09e40f9de80 RU 0.0 0 0 [swapper/15] It works well as expected. Some comments are added to explain why we use the hard-coded 16. Link: https://lkml.kernel.org/r/20211120112738.45980-5-laoar.shao@gmail.com Suggested-by: Kees Cook Signed-off-by: Yafang Shao Reviewed-by: David Hildenbrand Cc: Mathieu Desnoyers Cc: Arnaldo Carvalho de Melo Cc: Andrii Nakryiko Cc: Michal Miroslaw Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Matthew Wilcox Cc: David Hildenbrand Cc: Al Viro Cc: Kees Cook Cc: Petr Mladek Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Dennis Dalessandro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/elfcore-compat.h | 5 +++++ include/linux/elfcore.h | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/elfcore-compat.h b/include/linux/elfcore-compat.h index e272c3d452ce..54feb64e9b5d 100644 --- a/include/linux/elfcore-compat.h +++ b/include/linux/elfcore-compat.h @@ -43,6 +43,11 @@ struct compat_elf_prpsinfo __compat_uid_t pr_uid; __compat_gid_t pr_gid; compat_pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; + /* + * The hard-coded 16 is derived from TASK_COMM_LEN, but it can't be + * changed as it is exposed to userspace. We'd better make it hard-coded + * here. + */ char pr_fname[16]; char pr_psargs[ELF_PRARGSZ]; }; diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 957ebec35aad..746e081879a5 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -65,6 +65,11 @@ struct elf_prpsinfo __kernel_gid_t pr_gid; pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; /* Lots missing */ + /* + * The hard-coded 16 is derived from TASK_COMM_LEN, but it can't be + * changed as it is exposed to userspace. We'd better make it hard-coded + * here. + */ char pr_fname[16]; /* filename of executable */ char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ }; -- cgit v1.2.3 From 3087c61ed2c48548b74dd343a5209b87082c682d Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 19 Jan 2022 18:08:40 -0800 Subject: tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN As the sched:sched_switch tracepoint args are derived from the kernel, we'd better make it same with the kernel. So the macro TASK_COMM_LEN is converted to type enum, then all the BPF programs can get it through BTF. The BPF program which wants to use TASK_COMM_LEN should include the header vmlinux.h. Regarding the test_stacktrace_map and test_tracepoint, as the type defined in linux/bpf.h are also defined in vmlinux.h, so we don't need to include linux/bpf.h again. Link: https://lkml.kernel.org/r/20211120112738.45980-8-laoar.shao@gmail.com Signed-off-by: Yafang Shao Acked-by: Andrii Nakryiko Acked-by: David Hildenbrand Cc: Mathieu Desnoyers Cc: Arnaldo Carvalho de Melo Cc: Andrii Nakryiko Cc: Michal Miroslaw Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Matthew Wilcox Cc: David Hildenbrand Cc: Al Viro Cc: Kees Cook Cc: Petr Mladek Cc: Alexei Starovoitov Cc: Dennis Dalessandro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 78c351e35fec..cecd4806edc6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -274,8 +274,13 @@ struct task_group; #define get_current_state() READ_ONCE(current->__state) -/* Task command name length: */ -#define TASK_COMM_LEN 16 +/* + * Define the task command name length as enum, then it can be visible to + * BPF programs. + */ +enum { + TASK_COMM_LEN = 16, +}; extern void scheduler_tick(void); -- cgit v1.2.3 From d6986ce24fc00b0638bd29efe8fb7ba7619ed2aa Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 19 Jan 2022 18:08:43 -0800 Subject: kthread: dynamically allocate memory to store kthread's full name When I was implementing a new per-cpu kthread cfs_migration, I found the comm of it "cfs_migration/%u" is truncated due to the limitation of TASK_COMM_LEN. For example, the comm of the percpu thread on CPU10~19 all have the same name "cfs_migration/1", which will confuse the user. This issue is not critical, because we can get the corresponding CPU from the task's Cpus_allowed. But for kthreads corresponding to other hardware devices, it is not easy to get the detailed device info from task comm, for example, jbd2/nvme0n1p2- xfs-reclaim/sdf Currently there are so many truncated kthreads: rcu_tasks_kthre rcu_tasks_rude_ rcu_tasks_trace poll_mpt3sas0_s ext4-rsv-conver xfs-reclaim/sd{a, b, c, ...} xfs-blockgc/sd{a, b, c, ...} xfs-inodegc/sd{a, b, c, ...} audit_send_repl ecryptfs-kthrea vfio-irqfd-clea jbd2/nvme0n1p2- ... We can shorten these names to work around this problem, but it may be not applied to all of the truncated kthreads. Take 'jbd2/nvme0n1p2-' for example, it is a nice name, and it is not a good idea to shorten it. One possible way to fix this issue is extending the task comm size, but as task->comm is used in lots of places, that may cause some potential buffer overflows. Another more conservative approach is introducing a new pointer to store kthread's full name if it is truncated, which won't introduce too much overhead as it is in the non-critical path. Finally we make a dicision to use the second approach. See also the discussions in this thread: https://lore.kernel.org/lkml/20211101060419.4682-1-laoar.shao@gmail.com/ After this change, the full name of these truncated kthreads will be displayed via /proc/[pid]/comm: rcu_tasks_kthread rcu_tasks_rude_kthread rcu_tasks_trace_kthread poll_mpt3sas0_statu ext4-rsv-conversion xfs-reclaim/sdf1 xfs-blockgc/sdf1 xfs-inodegc/sdf1 audit_send_reply ecryptfs-kthread vfio-irqfd-cleanup jbd2/nvme0n1p2-8 Link: https://lkml.kernel.org/r/20211120112850.46047-1-laoar.shao@gmail.com Signed-off-by: Yafang Shao Reviewed-by: David Hildenbrand Reviewed-by: Petr Mladek Suggested-by: Petr Mladek Suggested-by: Steven Rostedt Cc: Mathieu Desnoyers Cc: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Michal Miroslaw Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Matthew Wilcox Cc: Al Viro Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 346b0f269161..2a5c04494663 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -33,6 +33,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), unsigned int cpu, const char *namefmt); +void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk); void set_kthread_struct(struct task_struct *p); void kthread_set_per_cpu(struct task_struct *k, int cpu); -- cgit v1.2.3 From 0425473037db40d9e322631f2d4dc6ef51f97e88 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 19 Jan 2022 18:08:56 -0800 Subject: list: introduce list_is_head() helper and re-use it in list.h Introduce list_is_head() in the similar (*) way as it's done for list_entry_is_head(). Make use of it in the list.h. *) it's done as inliner and not a macro to be aligned with other list_is_*() APIs; while at it, make all three to have the same style. Link: https://lkml.kernel.org/r/20211201141824.81400-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Heikki Krogerus Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list.h | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index 6636fc07f918..dd6c2041d09c 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -258,8 +258,7 @@ static inline void list_bulk_move_tail(struct list_head *head, * @list: the entry to test * @head: the head of the list */ -static inline int list_is_first(const struct list_head *list, - const struct list_head *head) +static inline int list_is_first(const struct list_head *list, const struct list_head *head) { return list->prev == head; } @@ -269,12 +268,21 @@ static inline int list_is_first(const struct list_head *list, * @list: the entry to test * @head: the head of the list */ -static inline int list_is_last(const struct list_head *list, - const struct list_head *head) +static inline int list_is_last(const struct list_head *list, const struct list_head *head) { return list->next == head; } +/** + * list_is_head - tests whether @list is the list @head + * @list: the entry to test + * @head: the head of the list + */ +static inline int list_is_head(const struct list_head *list, const struct list_head *head) +{ + return list == head; +} + /** * list_empty - tests whether a list is empty * @head: the list to test. @@ -318,7 +326,7 @@ static inline void list_del_init_careful(struct list_head *entry) static inline int list_empty_careful(const struct list_head *head) { struct list_head *next = smp_load_acquire(&head->next); - return (next == head) && (next == head->prev); + return list_is_head(next, head) && (next == head->prev); } /** @@ -393,10 +401,9 @@ static inline void list_cut_position(struct list_head *list, { if (list_empty(head)) return; - if (list_is_singular(head) && - (head->next != entry && head != entry)) + if (list_is_singular(head) && !list_is_head(entry, head) && (entry != head->next)) return; - if (entry == head) + if (list_is_head(entry, head)) INIT_LIST_HEAD(list); else __list_cut_position(list, head, entry); @@ -570,7 +577,7 @@ static inline void list_splice_tail_init(struct list_head *list, * @head: the head for your list. */ #define list_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); pos = pos->next) + for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) /** * list_for_each_continue - continue iteration over a list @@ -580,7 +587,7 @@ static inline void list_splice_tail_init(struct list_head *list, * Continue to iterate over a list, continuing after the current position. */ #define list_for_each_continue(pos, head) \ - for (pos = pos->next; pos != (head); pos = pos->next) + for (pos = pos->next; !list_is_head(pos, (head)); pos = pos->next) /** * list_for_each_prev - iterate over a list backwards @@ -588,7 +595,7 @@ static inline void list_splice_tail_init(struct list_head *list, * @head: the head for your list. */ #define list_for_each_prev(pos, head) \ - for (pos = (head)->prev; pos != (head); pos = pos->prev) + for (pos = (head)->prev; !list_is_head(pos, (head)); pos = pos->prev) /** * list_for_each_safe - iterate over a list safe against removal of list entry @@ -597,8 +604,9 @@ static inline void list_splice_tail_init(struct list_head *list, * @head: the head for your list. */ #define list_for_each_safe(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = n, n = pos->next) + for (pos = (head)->next, n = pos->next; \ + !list_is_head(pos, (head)); \ + pos = n, n = pos->next) /** * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry @@ -608,7 +616,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_prev_safe(pos, n, head) \ for (pos = (head)->prev, n = pos->prev; \ - pos != (head); \ + !list_is_head(pos, (head)); \ pos = n, n = pos->prev) /** -- cgit v1.2.3 From fd0a1462405b087377e59b84e119fe7e2d08499a Mon Sep 17 00:00:00 2001 From: Isabella Basso Date: Wed, 19 Jan 2022 18:09:02 -0800 Subject: hash.h: remove unused define directive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "test_hash.c: refactor into KUnit", v3. We refactored the lib/test_hash.c file into KUnit as part of the student group LKCAMP [1] introductory hackathon for kernel development. This test was pointed to our group by Daniel Latypov [2], so its full conversion into a pure KUnit test was our goal in this patch series, but we ran into many problems relating to it not being split as unit tests, which complicated matters a bit, as the reasoning behind the original tests is quite cryptic for those unfamiliar with hash implementations. Some interesting developments we'd like to highlight are: - In patch 1/5 we noticed that there was an unused define directive that could be removed. - In patch 4/5 we noticed how stringhash and hash tests are all under the lib/test_hash.c file, which might cause some confusion, and we also broke those kernel config entries up. Overall KUnit developments have been made in the other patches in this series: In patches 2/5, 3/5 and 5/5 we refactored the lib/test_hash.c file so as to make it more compatible with the KUnit style, whilst preserving the original idea of the maintainer who designed it (i.e. George Spelvin), which might be undesirable for unit tests, but we assume it is enough for a first patch. This patch (of 5): Currently, there exist hash_32() and __hash_32() functions, which were introduced in a patch [1] targeting architecture specific optimizations. These functions can be overridden on a per-architecture basis to achieve such optimizations. They must set their corresponding define directive (HAVE_ARCH_HASH_32 and HAVE_ARCH__HASH_32, respectively) so that header files can deal with these overrides properly. As the supported 32-bit architectures that have their own hash function implementation (i.e. m68k, Microblaze, H8/300, pa-risc) have only been making use of the (more general) __hash_32() function (which only lacks a right shift operation when compared to the hash_32() function), remove the define directive corresponding to the arch-specific hash_32() implementation. [1] https://lore.kernel.org/lkml/20160525073311.5600.qmail@ns.sciencehorizons.net/ [akpm@linux-foundation.org: hash_32_generic() becomes hash_32()] Link: https://lkml.kernel.org/r/20211208183711.390454-1-isabbasso@riseup.net Link: https://lkml.kernel.org/r/20211208183711.390454-2-isabbasso@riseup.net Reviewed-by: David Gow Tested-by: David Gow Co-developed-by: Augusto Durães Camargo Signed-off-by: Augusto Durães Camargo Co-developed-by: Enzo Ferreira Signed-off-by: Enzo Ferreira Signed-off-by: Isabella Basso Cc: Geert Uytterhoeven Cc: Brendan Higgins Cc: Daniel Latypov Cc: Shuah Khan Cc: Rodrigo Siqueira Cc: kernel test robot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hash.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/hash.h b/include/linux/hash.h index ad6fa21d977b..38edaa08f862 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -62,10 +62,7 @@ static inline u32 __hash_32_generic(u32 val) return val * GOLDEN_RATIO_32; } -#ifndef HAVE_ARCH_HASH_32 -#define hash_32 hash_32_generic -#endif -static inline u32 hash_32_generic(u32 val, unsigned int bits) +static inline u32 hash_32(u32 val, unsigned int bits) { /* High bits are more random, so use them. */ return __hash_32(val) >> (32 - bits); -- cgit v1.2.3 From 60c7801b121aa0e90d8aae7245859aec0ce2306f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 19 Jan 2022 18:09:19 -0800 Subject: kunit: replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Link: https://lkml.kernel.org/r/20211213204441.56204-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/kunit/assert.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/kunit/assert.h b/include/kunit/assert.h index ad889b539ab3..ccbc36c0b02f 100644 --- a/include/kunit/assert.h +++ b/include/kunit/assert.h @@ -10,7 +10,7 @@ #define _KUNIT_ASSERT_H #include -#include +#include struct kunit; struct string_stream; -- cgit v1.2.3 From 8e930a66993be0a5f9a97c7c1c76ef09db4ef8bb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 19 Jan 2022 18:09:22 -0800 Subject: uuid: discourage people from using UAPI header in new code Discourage people from using UAPI header in new code by adding a note. Link: https://lkml.kernel.org/r/20211216113552.81199-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/uuid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h index e5a7eecef7c3..32615dc5f0cf 100644 --- a/include/uapi/linux/uuid.h +++ b/include/uapi/linux/uuid.h @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* DO NOT USE in new code! This is solely for MEI due to legacy reasons */ /* * UUID/GUID definition * -- cgit v1.2.3 From c7e4289cbe668c2743ac0fd623a2518dbc191dc0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 19 Jan 2022 18:09:25 -0800 Subject: uuid: remove licence boilerplate text from the header Remove licence boilerplate text from the UAPI header. Link: https://lkml.kernel.org/r/20211216113552.81199-2-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/uuid.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h index 32615dc5f0cf..c0f4bd9b040e 100644 --- a/include/uapi/linux/uuid.h +++ b/include/uapi/linux/uuid.h @@ -5,15 +5,6 @@ * * Copyright (C) 2010, Intel Corp. * Huang Ying - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation; - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef _UAPI_LINUX_UUID_H_ -- cgit v1.2.3 From 23b36fec7e14f8cf1c17e832e53dd4761e0dfe83 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 19 Jan 2022 18:09:56 -0800 Subject: panic: use error_report_end tracepoint on warnings Introduce the error detector "warning" to the error_report event and use the error_report_end tracepoint at the end of a warning report. This allows in-kernel tests but also userspace to more easily determine if a warning occurred without polling kernel logs. [akpm@linux-foundation.org: add comma to enum list, per Andy] Link: https://lkml.kernel.org/r/20211115085630.1756817-1-elver@google.com Signed-off-by: Marco Elver Cc: Steven Rostedt Cc: Ingo Molnar Cc: Alexander Potapenko Cc: Petr Mladek Cc: Luis Chamberlain Cc: Wei Liu Cc: Mike Rapoport Cc: Arnd Bergmann Cc: John Ogness Cc: Andy Shevchenko Cc: Alexander Popov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/error_report.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/error_report.h b/include/trace/events/error_report.h index 96f64bf218b2..a1922a800e6f 100644 --- a/include/trace/events/error_report.h +++ b/include/trace/events/error_report.h @@ -17,14 +17,16 @@ enum error_detector { ERROR_DETECTOR_KFENCE, - ERROR_DETECTOR_KASAN + ERROR_DETECTOR_KASAN, + ERROR_DETECTOR_WARN, }; #endif /* __ERROR_REPORT_DECLARE_TRACE_ENUMS_ONCE_ONLY */ -#define error_detector_list \ +#define error_detector_list \ EM(ERROR_DETECTOR_KFENCE, "kfence") \ - EMe(ERROR_DETECTOR_KASAN, "kasan") + EM(ERROR_DETECTOR_KASAN, "kasan") \ + EMe(ERROR_DETECTOR_WARN, "warning") /* Always end the list with an EMe. */ #undef EM -- cgit v1.2.3 From a3d5dc908a5f572ce3e31fe83fd2459a1c3c5422 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Wed, 19 Jan 2022 18:10:02 -0800 Subject: delayacct: support swapin delay accounting for swapping without blkio Currently delayacct accounts swapin delay only for swapping that cause blkio. If we use zram for swapping, tools/accounting/getdelays can't get any SWAP delay. It's useful to get zram swapin delay information, for example to adjust compress algorithm or /proc/sys/vm/swappiness. Reference to PSI, it accounts any kind of swapping by doing its work in swap_readpage(), no matter whether swapping causes blkio. Let delayacct do the similar work. Link: https://lkml.kernel.org/r/20211112083813.8559-1-yang.yang29@zte.com.cn Signed-off-by: Yang Yang Reported-by: Zeal Robot Cc: Balbir Singh Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/delayacct.h | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index af7e6eb50283..b96d68f310a2 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -9,14 +9,6 @@ #include -/* - * Per-task flags relevant to delay accounting - * maintained privately to avoid exhausting similar flags in sched.h:PF_* - * Used to set current->delays->flags - */ -#define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */ -#define DELAYACCT_PF_BLKIO 0x00000002 /* I am waiting on IO */ - #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info { raw_spinlock_t lock; @@ -37,13 +29,13 @@ struct task_delay_info { * associated with the operation is added to XXX_delay. * XXX_delay contains the accumulated delay time in nanoseconds. */ - u64 blkio_start; /* Shared by blkio, swapin */ + u64 blkio_start; u64 blkio_delay; /* wait for sync block io completion */ - u64 swapin_delay; /* wait for swapin block io completion */ + u64 swapin_start; + u64 swapin_delay; /* wait for swapin */ u32 blkio_count; /* total count of the number of sync block */ /* io operations performed */ - u32 swapin_count; /* total count of the number of swapin block */ - /* io operations performed */ + u32 swapin_count; /* total count of swapin */ u64 freepages_start; u64 freepages_delay; /* wait for memory reclaim */ @@ -79,14 +71,8 @@ extern void __delayacct_freepages_start(void); extern void __delayacct_freepages_end(void); extern void __delayacct_thrashing_start(void); extern void __delayacct_thrashing_end(void); - -static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) -{ - if (p->delays) - return (p->delays->flags & DELAYACCT_PF_BLKIO); - else - return 0; -} +extern void __delayacct_swapin_start(void); +extern void __delayacct_swapin_end(void); static inline void delayacct_set_flag(struct task_struct *p, int flag) { @@ -123,7 +109,6 @@ static inline void delayacct_blkio_start(void) if (!static_branch_unlikely(&delayacct_key)) return; - delayacct_set_flag(current, DELAYACCT_PF_BLKIO); if (current->delays) __delayacct_blkio_start(); } @@ -135,7 +120,6 @@ static inline void delayacct_blkio_end(struct task_struct *p) if (p->delays) __delayacct_blkio_end(p); - delayacct_clear_flag(p, DELAYACCT_PF_BLKIO); } static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) @@ -169,6 +153,18 @@ static inline void delayacct_thrashing_end(void) __delayacct_thrashing_end(); } +static inline void delayacct_swapin_start(void) +{ + if (current->delays) + __delayacct_swapin_start(); +} + +static inline void delayacct_swapin_end(void) +{ + if (current->delays) + __delayacct_swapin_end(); +} + #else static inline void delayacct_set_flag(struct task_struct *p, int flag) {} @@ -199,6 +195,10 @@ static inline void delayacct_thrashing_start(void) {} static inline void delayacct_thrashing_end(void) {} +static inline void delayacct_swapin_start(void) +{} +static inline void delayacct_swapin_end(void) +{} #endif /* CONFIG_TASK_DELAY_ACCT */ -- cgit v1.2.3 From 82065b7266899fbdce4c7394d7dd02688161f0cf Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Wed, 19 Jan 2022 18:10:06 -0800 Subject: delayacct: fix incomplete disable operation when switch enable to disable When a task is created after delayacct is enabled, kernel will do all the delay accountings for that task. The problems is if user disables delayacct by set /proc/sys/kernel/task_delayacct to zero, only blkio delay accounting is disabled. Now disable all the kinds of delay accountings when /proc/sys/kernel/task_delayacct sets to zero. Link: https://lkml.kernel.org/r/20211123140342.32962-1-ran.xiaokai@zte.com.cn Signed-off-by: Yang Yang Reported-by: Zeal Robot Cc: Balbir Singh Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/delayacct.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index b96d68f310a2..c675cfb6437e 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -131,36 +131,54 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) static inline void delayacct_freepages_start(void) { + if (!static_branch_unlikely(&delayacct_key)) + return; + if (current->delays) __delayacct_freepages_start(); } static inline void delayacct_freepages_end(void) { + if (!static_branch_unlikely(&delayacct_key)) + return; + if (current->delays) __delayacct_freepages_end(); } static inline void delayacct_thrashing_start(void) { + if (!static_branch_unlikely(&delayacct_key)) + return; + if (current->delays) __delayacct_thrashing_start(); } static inline void delayacct_thrashing_end(void) { + if (!static_branch_unlikely(&delayacct_key)) + return; + if (current->delays) __delayacct_thrashing_end(); } static inline void delayacct_swapin_start(void) { + if (!static_branch_unlikely(&delayacct_key)) + return; + if (current->delays) __delayacct_swapin_start(); } static inline void delayacct_swapin_end(void) { + if (!static_branch_unlikely(&delayacct_key)) + return; + if (current->delays) __delayacct_swapin_end(); } -- cgit v1.2.3 From 1193829da1a6728249cd02577a020bd64fd9c160 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Wed, 19 Jan 2022 18:10:09 -0800 Subject: delayacct: cleanup flags in struct task_delay_info and functions use it Flags in struct task_delay_info is used to distinguish the difference between swapin and blkio delay acountings. But after patch "delayacct: support swapin delay accounting for swapping without blkio", there is no need to do that since swapin and blkio delay accounting use their own functions. Link: https://lkml.kernel.org/r/20211124065958.36703-1-yang.yang29@zte.com.cn Signed-off-by: Yang Yang Cc: Balbir Singh Cc: Ingo Molnar Cc: Johannes Weiner Cc: Peter Zijlstra Cc: Zeal Robot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/delayacct.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index c675cfb6437e..435c3654a0ff 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -12,7 +12,6 @@ #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info { raw_spinlock_t lock; - unsigned int flags; /* Private per-task flags */ /* For each stat XXX, add following, aligned appropriately * @@ -74,18 +73,6 @@ extern void __delayacct_thrashing_end(void); extern void __delayacct_swapin_start(void); extern void __delayacct_swapin_end(void); -static inline void delayacct_set_flag(struct task_struct *p, int flag) -{ - if (p->delays) - p->delays->flags |= flag; -} - -static inline void delayacct_clear_flag(struct task_struct *p, int flag) -{ - if (p->delays) - p->delays->flags &= ~flag; -} - static inline void delayacct_tsk_init(struct task_struct *tsk) { /* reinitialize in case parent's non-null pointer was dup'ed*/ @@ -184,10 +171,6 @@ static inline void delayacct_swapin_end(void) } #else -static inline void delayacct_set_flag(struct task_struct *p, int flag) -{} -static inline void delayacct_clear_flag(struct task_struct *p, int flag) -{} static inline void delayacct_init(void) {} static inline void delayacct_tsk_init(struct task_struct *tsk) -- cgit v1.2.3 From 5bf18281534451bf1ad56a45a3085cd7ad46860d Mon Sep 17 00:00:00 2001 From: wangyong Date: Wed, 19 Jan 2022 18:10:15 -0800 Subject: delayacct: track delays from memory compact Delay accounting does not track the delay of memory compact. When there is not enough free memory, tasks can spend a amount of their time waiting for compact. To get the impact of tasks in direct memory compact, measure the delay when allocating memory through memory compact. Also update tools/accounting/getdelays.c: / # ./getdelays_next -di -p 304 print delayacct stats ON printing IO accounting PID 304 CPU count real total virtual total delay total delay average 277 780000000 849039485 18877296 0.068ms IO count delay total delay average 0 0 0ms SWAP count delay total delay average 0 0 0ms RECLAIM count delay total delay average 5 11088812685 2217ms THRASHING count delay total delay average 0 0 0ms COMPACT count delay total delay average 3 72758 0ms watch: read=0, write=0, cancelled_write=0 Link: https://lkml.kernel.org/r/1638619795-71451-1-git-send-email-wang.yong12@zte.com.cn Signed-off-by: wangyong Reviewed-by: Jiang Xuexin Reviewed-by: Zhang Wenya Reviewed-by: Yang Yang Reviewed-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/delayacct.h | 28 ++++++++++++++++++++++++++++ include/uapi/linux/taskstats.h | 6 +++++- 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 435c3654a0ff..3e03d010bd2e 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -42,8 +42,12 @@ struct task_delay_info { u64 thrashing_start; u64 thrashing_delay; /* wait for thrashing page */ + u64 compact_start; + u64 compact_delay; /* wait for memory compact */ + u32 freepages_count; /* total count of memory reclaim */ u32 thrashing_count; /* total count of thrash waits */ + u32 compact_count; /* total count of memory compact */ }; #endif @@ -72,6 +76,8 @@ extern void __delayacct_thrashing_start(void); extern void __delayacct_thrashing_end(void); extern void __delayacct_swapin_start(void); extern void __delayacct_swapin_end(void); +extern void __delayacct_compact_start(void); +extern void __delayacct_compact_end(void); static inline void delayacct_tsk_init(struct task_struct *tsk) { @@ -170,6 +176,24 @@ static inline void delayacct_swapin_end(void) __delayacct_swapin_end(); } +static inline void delayacct_compact_start(void) +{ + if (!static_branch_unlikely(&delayacct_key)) + return; + + if (current->delays) + __delayacct_compact_start(); +} + +static inline void delayacct_compact_end(void) +{ + if (!static_branch_unlikely(&delayacct_key)) + return; + + if (current->delays) + __delayacct_compact_end(); +} + #else static inline void delayacct_init(void) {} @@ -200,6 +224,10 @@ static inline void delayacct_swapin_start(void) {} static inline void delayacct_swapin_end(void) {} +static inline void delayacct_compact_start(void) +{} +static inline void delayacct_compact_end(void) +{} #endif /* CONFIG_TASK_DELAY_ACCT */ diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index ccbd08709321..12327d32378f 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 10 +#define TASKSTATS_VERSION 11 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -172,6 +172,10 @@ struct taskstats { /* v10: 64-bit btime to avoid overflow */ __u64 ac_btime64; /* 64-bit begin time */ + + /* Delay waiting for memory compact */ + __u64 compact_count; + __u64 compact_delay_total; }; -- cgit v1.2.3 From 440323b6cf5b9896013a78c4f578823e8243a7fd Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 14 Jan 2022 18:58:57 +0800 Subject: asm-generic: Add missing brackets for io_stop_wc macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After using io_stop_wc(), drivers reports following compile error when compiled on X86. drivers/net/ethernet/hisilicon/hns3/hns3_enet.c: In function ‘hns3_tx_push_bd’: drivers/net/ethernet/hisilicon/hns3/hns3_enet.c:2058:12: error: expected ‘;’ before ‘(’ token io_stop_wc(); ^ It is because I missed to add the brackets after io_stop_wc macro. So let's add the missing brackets. Fixes: d5624bb29f49 ("asm-generic: introduce io_stop_wc() and add implementation for ARM64") Reported-by: Guangbin Huang Signed-off-by: Xiongfeng Wang Link: https://lore.kernel.org/r/20220114105857.126300-1-wangxiongfeng2@huawei.com Signed-off-by: Catalin Marinas --- include/asm-generic/barrier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 4c2c1b830344..3385ca5ca5c0 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -259,7 +259,7 @@ do { \ * write-combining memory accesses before this macro with those after it. */ #ifndef io_stop_wc -#define io_stop_wc do { } while (0) +#define io_stop_wc() do { } while (0) #endif #endif /* !__ASSEMBLY__ */ -- cgit v1.2.3 From 66a8f7f04979f4ad739085f01d99c8caf620b4f5 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 18 Jan 2022 18:35:02 +0100 Subject: of: base: make small of_parse_phandle() variants static inline Make all the smaller variants of the of_parse_phandle() static inline. This also let us remove the empty function stubs if CONFIG_OF is not defined. Suggested-by: Rob Herring Signed-off-by: Michael Walle [robh: move index < 0 check into __of_parse_phandle_with_args] Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220118173504.2867523-2-michael@walle.cc --- include/linux/of.h | 148 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 120 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index ff143a027abc..16d76c92fbe0 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -364,18 +364,12 @@ extern const struct of_device_id *of_match_node( const struct of_device_id *matches, const struct device_node *node); extern int of_modalias_node(struct device_node *node, char *modalias, int len); extern void of_print_phandle_args(const char *msg, const struct of_phandle_args *args); -extern struct device_node *of_parse_phandle(const struct device_node *np, - const char *phandle_name, - int index); -extern int of_parse_phandle_with_args(const struct device_node *np, - const char *list_name, const char *cells_name, int index, - struct of_phandle_args *out_args); +extern int __of_parse_phandle_with_args(const struct device_node *np, + const char *list_name, const char *cells_name, int cell_count, + int index, struct of_phandle_args *out_args); extern int of_parse_phandle_with_args_map(const struct device_node *np, const char *list_name, const char *stem_name, int index, struct of_phandle_args *out_args); -extern int of_parse_phandle_with_fixed_args(const struct device_node *np, - const char *list_name, int cells_count, int index, - struct of_phandle_args *out_args); extern int of_count_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name); @@ -865,18 +859,12 @@ static inline int of_property_read_string_helper(const struct device_node *np, return -ENOSYS; } -static inline struct device_node *of_parse_phandle(const struct device_node *np, - const char *phandle_name, - int index) -{ - return NULL; -} - -static inline int of_parse_phandle_with_args(const struct device_node *np, - const char *list_name, - const char *cells_name, - int index, - struct of_phandle_args *out_args) +static inline int __of_parse_phandle_with_args(const struct device_node *np, + const char *list_name, + const char *cells_name, + int cell_count, + int index, + struct of_phandle_args *out_args) { return -ENOSYS; } @@ -890,13 +878,6 @@ static inline int of_parse_phandle_with_args_map(const struct device_node *np, return -ENOSYS; } -static inline int of_parse_phandle_with_fixed_args(const struct device_node *np, - const char *list_name, int cells_count, int index, - struct of_phandle_args *out_args) -{ - return -ENOSYS; -} - static inline int of_count_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name) @@ -1077,6 +1058,117 @@ static inline bool of_node_is_type(const struct device_node *np, const char *typ return np && match && type && !strcmp(match, type); } +/** + * of_parse_phandle - Resolve a phandle property to a device_node pointer + * @np: Pointer to device node holding phandle property + * @phandle_name: Name of property holding a phandle value + * @index: For properties holding a table of phandles, this is the index into + * the table + * + * Return: The device_node pointer with refcount incremented. Use + * of_node_put() on it when done. + */ +static inline struct device_node *of_parse_phandle(const struct device_node *np, + const char *phandle_name, + int index) +{ + struct of_phandle_args args; + + if (__of_parse_phandle_with_args(np, phandle_name, NULL, 0, + index, &args)) + return NULL; + + return args.np; +} + +/** + * of_parse_phandle_with_args() - Find a node pointed by phandle in a list + * @np: pointer to a device tree node containing a list + * @list_name: property name that contains a list + * @cells_name: property name that specifies phandles' arguments count + * @index: index of a phandle to parse out + * @out_args: optional pointer to output arguments structure (will be filled) + * + * This function is useful to parse lists of phandles and their arguments. + * Returns 0 on success and fills out_args, on error returns appropriate + * errno value. + * + * Caller is responsible to call of_node_put() on the returned out_args->np + * pointer. + * + * Example:: + * + * phandle1: node1 { + * #list-cells = <2>; + * }; + * + * phandle2: node2 { + * #list-cells = <1>; + * }; + * + * node3 { + * list = <&phandle1 1 2 &phandle2 3>; + * }; + * + * To get a device_node of the ``node2`` node you may call this: + * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args); + */ +static inline int of_parse_phandle_with_args(const struct device_node *np, + const char *list_name, + const char *cells_name, + int index, + struct of_phandle_args *out_args) +{ + int cell_count = -1; + + /* If cells_name is NULL we assume a cell count of 0 */ + if (!cells_name) + cell_count = 0; + + return __of_parse_phandle_with_args(np, list_name, cells_name, + cell_count, index, out_args); +} + +/** + * of_parse_phandle_with_fixed_args() - Find a node pointed by phandle in a list + * @np: pointer to a device tree node containing a list + * @list_name: property name that contains a list + * @cell_count: number of argument cells following the phandle + * @index: index of a phandle to parse out + * @out_args: optional pointer to output arguments structure (will be filled) + * + * This function is useful to parse lists of phandles and their arguments. + * Returns 0 on success and fills out_args, on error returns appropriate + * errno value. + * + * Caller is responsible to call of_node_put() on the returned out_args->np + * pointer. + * + * Example:: + * + * phandle1: node1 { + * }; + * + * phandle2: node2 { + * }; + * + * node3 { + * list = <&phandle1 0 2 &phandle2 2 3>; + * }; + * + * To get a device_node of the ``node2`` node you may call this: + * of_parse_phandle_with_fixed_args(node3, "list", 2, 1, &args); + */ +static inline int of_parse_phandle_with_fixed_args(const struct device_node *np, + const char *list_name, + int cell_count, + int index, + struct of_phandle_args *out_args) +{ + return __of_parse_phandle_with_args(np, list_name, NULL, cell_count, + index, out_args); +} + /** * of_property_count_u8_elems - Count the number of u8 elements in a property * -- cgit v1.2.3 From 2ca42c3ad9ed875b136065b010753a4caaaa1d38 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 18 Jan 2022 18:35:03 +0100 Subject: of: property: define of_property_read_u{8,16,32,64}_array() unconditionally We can get rid of all the empty stubs because all these functions call of_property_read_variable_u{8,16,32,64}_array() which already have an empty stub if CONFIG_OF is not defined. Signed-off-by: Michael Walle Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220118173504.2867523-3-michael@walle.cc --- include/linux/of.h | 274 ++++++++++++++++++++++++----------------------------- 1 file changed, 124 insertions(+), 150 deletions(-) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index 16d76c92fbe0..2dc77430a91a 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -410,130 +410,6 @@ extern int of_detach_node(struct device_node *); #define of_match_ptr(_ptr) (_ptr) -/** - * of_property_read_u8_array - Find and read an array of u8 from a property. - * - * @np: device node from which the property value is to be read. - * @propname: name of the property to be searched. - * @out_values: pointer to return value, modified only if return value is 0. - * @sz: number of array elements to read - * - * Search for a property in a device node and read 8-bit value(s) from - * it. - * - * dts entry of array should be like: - * ``property = /bits/ 8 <0x50 0x60 0x70>;`` - * - * Return: 0 on success, -EINVAL if the property does not exist, - * -ENODATA if property does not have a value, and -EOVERFLOW if the - * property data isn't large enough. - * - * The out_values is modified only if a valid u8 value can be decoded. - */ -static inline int of_property_read_u8_array(const struct device_node *np, - const char *propname, - u8 *out_values, size_t sz) -{ - int ret = of_property_read_variable_u8_array(np, propname, out_values, - sz, 0); - if (ret >= 0) - return 0; - else - return ret; -} - -/** - * of_property_read_u16_array - Find and read an array of u16 from a property. - * - * @np: device node from which the property value is to be read. - * @propname: name of the property to be searched. - * @out_values: pointer to return value, modified only if return value is 0. - * @sz: number of array elements to read - * - * Search for a property in a device node and read 16-bit value(s) from - * it. - * - * dts entry of array should be like: - * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;`` - * - * Return: 0 on success, -EINVAL if the property does not exist, - * -ENODATA if property does not have a value, and -EOVERFLOW if the - * property data isn't large enough. - * - * The out_values is modified only if a valid u16 value can be decoded. - */ -static inline int of_property_read_u16_array(const struct device_node *np, - const char *propname, - u16 *out_values, size_t sz) -{ - int ret = of_property_read_variable_u16_array(np, propname, out_values, - sz, 0); - if (ret >= 0) - return 0; - else - return ret; -} - -/** - * of_property_read_u32_array - Find and read an array of 32 bit integers - * from a property. - * - * @np: device node from which the property value is to be read. - * @propname: name of the property to be searched. - * @out_values: pointer to return value, modified only if return value is 0. - * @sz: number of array elements to read - * - * Search for a property in a device node and read 32-bit value(s) from - * it. - * - * Return: 0 on success, -EINVAL if the property does not exist, - * -ENODATA if property does not have a value, and -EOVERFLOW if the - * property data isn't large enough. - * - * The out_values is modified only if a valid u32 value can be decoded. - */ -static inline int of_property_read_u32_array(const struct device_node *np, - const char *propname, - u32 *out_values, size_t sz) -{ - int ret = of_property_read_variable_u32_array(np, propname, out_values, - sz, 0); - if (ret >= 0) - return 0; - else - return ret; -} - -/** - * of_property_read_u64_array - Find and read an array of 64 bit integers - * from a property. - * - * @np: device node from which the property value is to be read. - * @propname: name of the property to be searched. - * @out_values: pointer to return value, modified only if return value is 0. - * @sz: number of array elements to read - * - * Search for a property in a device node and read 64-bit value(s) from - * it. - * - * Return: 0 on success, -EINVAL if the property does not exist, - * -ENODATA if property does not have a value, and -EOVERFLOW if the - * property data isn't large enough. - * - * The out_values is modified only if a valid u64 value can be decoded. - */ -static inline int of_property_read_u64_array(const struct device_node *np, - const char *propname, - u64 *out_values, size_t sz) -{ - int ret = of_property_read_variable_u64_array(np, propname, out_values, - sz, 0); - if (ret >= 0) - return 0; - else - return ret; -} - /* * struct property *prop; * const __be32 *p; @@ -728,32 +604,6 @@ static inline int of_property_count_elems_of_size(const struct device_node *np, return -ENOSYS; } -static inline int of_property_read_u8_array(const struct device_node *np, - const char *propname, u8 *out_values, size_t sz) -{ - return -ENOSYS; -} - -static inline int of_property_read_u16_array(const struct device_node *np, - const char *propname, u16 *out_values, size_t sz) -{ - return -ENOSYS; -} - -static inline int of_property_read_u32_array(const struct device_node *np, - const char *propname, - u32 *out_values, size_t sz) -{ - return -ENOSYS; -} - -static inline int of_property_read_u64_array(const struct device_node *np, - const char *propname, - u64 *out_values, size_t sz) -{ - return -ENOSYS; -} - static inline int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value) { @@ -1328,6 +1178,130 @@ static inline bool of_property_read_bool(const struct device_node *np, return prop ? true : false; } +/** + * of_property_read_u8_array - Find and read an array of u8 from a property. + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_values: pointer to return value, modified only if return value is 0. + * @sz: number of array elements to read + * + * Search for a property in a device node and read 8-bit value(s) from + * it. + * + * dts entry of array should be like: + * ``property = /bits/ 8 <0x50 0x60 0x70>;`` + * + * Return: 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. + * + * The out_values is modified only if a valid u8 value can be decoded. + */ +static inline int of_property_read_u8_array(const struct device_node *np, + const char *propname, + u8 *out_values, size_t sz) +{ + int ret = of_property_read_variable_u8_array(np, propname, out_values, + sz, 0); + if (ret >= 0) + return 0; + else + return ret; +} + +/** + * of_property_read_u16_array - Find and read an array of u16 from a property. + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_values: pointer to return value, modified only if return value is 0. + * @sz: number of array elements to read + * + * Search for a property in a device node and read 16-bit value(s) from + * it. + * + * dts entry of array should be like: + * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;`` + * + * Return: 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. + * + * The out_values is modified only if a valid u16 value can be decoded. + */ +static inline int of_property_read_u16_array(const struct device_node *np, + const char *propname, + u16 *out_values, size_t sz) +{ + int ret = of_property_read_variable_u16_array(np, propname, out_values, + sz, 0); + if (ret >= 0) + return 0; + else + return ret; +} + +/** + * of_property_read_u32_array - Find and read an array of 32 bit integers + * from a property. + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_values: pointer to return value, modified only if return value is 0. + * @sz: number of array elements to read + * + * Search for a property in a device node and read 32-bit value(s) from + * it. + * + * Return: 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. + * + * The out_values is modified only if a valid u32 value can be decoded. + */ +static inline int of_property_read_u32_array(const struct device_node *np, + const char *propname, + u32 *out_values, size_t sz) +{ + int ret = of_property_read_variable_u32_array(np, propname, out_values, + sz, 0); + if (ret >= 0) + return 0; + else + return ret; +} + +/** + * of_property_read_u64_array - Find and read an array of 64 bit integers + * from a property. + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_values: pointer to return value, modified only if return value is 0. + * @sz: number of array elements to read + * + * Search for a property in a device node and read 64-bit value(s) from + * it. + * + * Return: 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. + * + * The out_values is modified only if a valid u64 value can be decoded. + */ +static inline int of_property_read_u64_array(const struct device_node *np, + const char *propname, + u64 *out_values, size_t sz) +{ + int ret = of_property_read_variable_u64_array(np, propname, out_values, + sz, 0); + if (ret >= 0) + return 0; + else + return ret; +} + static inline int of_property_read_u8(const struct device_node *np, const char *propname, u8 *out_value) -- cgit v1.2.3 From 8c39b8bc82aafcc8dd378bd79c76fac8e8a89c8d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Jan 2022 11:44:54 +0000 Subject: cachefiles: Make some tracepoint adjustments Make some adjustments to tracepoints to make the tracing a bit more followable: (1) Standardise on displaying the backing inode number as "B=" with no leading zeros. (2) Make the cachefiles_lookup tracepoint log the directory inode number as well as the looked-up inode number. (3) Add a cachefiles_lookup tracepoint into cachefiles_get_directory() to log directory lookup. (4) Add a new cachefiles_mkdir tracepoint and use that to log a successful mkdir from cachefiles_get_directory(). (5) Make the cachefiles_unlink and cachefiles_rename tracepoints log the inode number of the affected file/dir rather than dentry struct pointers. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/164251403694.3435901.9797725381831316715.stgit@warthog.procyon.org.uk/ # v1 --- include/trace/events/cachefiles.h | 82 ++++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index 1172529b5b49..093c4acb7a3a 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -233,25 +233,48 @@ TRACE_EVENT(cachefiles_ref, TRACE_EVENT(cachefiles_lookup, TP_PROTO(struct cachefiles_object *obj, + struct dentry *dir, struct dentry *de), - TP_ARGS(obj, de), + TP_ARGS(obj, dir, de), TP_STRUCT__entry( __field(unsigned int, obj ) __field(short, error ) + __field(unsigned long, dino ) __field(unsigned long, ino ) ), TP_fast_assign( - __entry->obj = obj->debug_id; + __entry->obj = obj ? obj->debug_id : 0; + __entry->dino = d_backing_inode(dir)->i_ino; __entry->ino = (!IS_ERR(de) && d_backing_inode(de) ? d_backing_inode(de)->i_ino : 0); __entry->error = IS_ERR(de) ? PTR_ERR(de) : 0; ), - TP_printk("o=%08x i=%lx e=%d", - __entry->obj, __entry->ino, __entry->error) + TP_printk("o=%08x dB=%lx B=%lx e=%d", + __entry->obj, __entry->dino, __entry->ino, __entry->error) + ); + +TRACE_EVENT(cachefiles_mkdir, + TP_PROTO(struct dentry *dir, struct dentry *subdir), + + TP_ARGS(dir, subdir), + + TP_STRUCT__entry( + __field(unsigned int, dir ) + __field(unsigned int, subdir ) + ), + + TP_fast_assign( + __entry->dir = d_backing_inode(dir)->i_ino; + __entry->subdir = d_backing_inode(subdir)->i_ino; + ), + + TP_printk("dB=%x sB=%x", + __entry->dir, + __entry->subdir) ); TRACE_EVENT(cachefiles_tmpfile, @@ -269,7 +292,7 @@ TRACE_EVENT(cachefiles_tmpfile, __entry->backer = backer->i_ino; ), - TP_printk("o=%08x b=%08x", + TP_printk("o=%08x B=%x", __entry->obj, __entry->backer) ); @@ -289,61 +312,58 @@ TRACE_EVENT(cachefiles_link, __entry->backer = backer->i_ino; ), - TP_printk("o=%08x b=%08x", + TP_printk("o=%08x B=%x", __entry->obj, __entry->backer) ); TRACE_EVENT(cachefiles_unlink, TP_PROTO(struct cachefiles_object *obj, - struct dentry *de, + ino_t ino, enum fscache_why_object_killed why), - TP_ARGS(obj, de, why), + TP_ARGS(obj, ino, why), /* Note that obj may be NULL */ TP_STRUCT__entry( __field(unsigned int, obj ) - __field(struct dentry *, de ) + __field(unsigned int, ino ) __field(enum fscache_why_object_killed, why ) ), TP_fast_assign( __entry->obj = obj ? obj->debug_id : UINT_MAX; - __entry->de = de; + __entry->ino = ino; __entry->why = why; ), - TP_printk("o=%08x d=%p w=%s", - __entry->obj, __entry->de, + TP_printk("o=%08x B=%x w=%s", + __entry->obj, __entry->ino, __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) ); TRACE_EVENT(cachefiles_rename, TP_PROTO(struct cachefiles_object *obj, - struct dentry *de, - struct dentry *to, + ino_t ino, enum fscache_why_object_killed why), - TP_ARGS(obj, de, to, why), + TP_ARGS(obj, ino, why), /* Note that obj may be NULL */ TP_STRUCT__entry( __field(unsigned int, obj ) - __field(struct dentry *, de ) - __field(struct dentry *, to ) + __field(unsigned int, ino ) __field(enum fscache_why_object_killed, why ) ), TP_fast_assign( __entry->obj = obj ? obj->debug_id : UINT_MAX; - __entry->de = de; - __entry->to = to; + __entry->ino = ino; __entry->why = why; ), - TP_printk("o=%08x d=%p t=%p w=%s", - __entry->obj, __entry->de, __entry->to, + TP_printk("o=%08x B=%x w=%s", + __entry->obj, __entry->ino, __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) ); @@ -370,7 +390,7 @@ TRACE_EVENT(cachefiles_coherency, __entry->ino = ino; ), - TP_printk("o=%08x %s i=%llx c=%u", + TP_printk("o=%08x %s B=%llx c=%u", __entry->obj, __print_symbolic(__entry->why, cachefiles_coherency_traces), __entry->ino, @@ -397,7 +417,7 @@ TRACE_EVENT(cachefiles_vol_coherency, __entry->ino = ino; ), - TP_printk("V=%08x %s i=%llx", + TP_printk("V=%08x %s B=%llx", __entry->vol, __print_symbolic(__entry->why, cachefiles_coherency_traces), __entry->ino) @@ -435,7 +455,7 @@ TRACE_EVENT(cachefiles_prep_read, __entry->cache_inode = cache_inode; ), - TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x b=%x", + TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x B=%x", __entry->rreq, __entry->index, __print_symbolic(__entry->source, netfs_sreq_sources), __print_symbolic(__entry->why, cachefiles_prepare_read_traces), @@ -466,7 +486,7 @@ TRACE_EVENT(cachefiles_read, __entry->len = len; ), - TP_printk("o=%08x b=%08x s=%llx l=%zx", + TP_printk("o=%08x B=%x s=%llx l=%zx", __entry->obj, __entry->backer, __entry->start, @@ -495,7 +515,7 @@ TRACE_EVENT(cachefiles_write, __entry->len = len; ), - TP_printk("o=%08x b=%08x s=%llx l=%zx", + TP_printk("o=%08x B=%x s=%llx l=%zx", __entry->obj, __entry->backer, __entry->start, @@ -524,7 +544,7 @@ TRACE_EVENT(cachefiles_trunc, __entry->why = why; ), - TP_printk("o=%08x b=%08x %s l=%llx->%llx", + TP_printk("o=%08x B=%x %s l=%llx->%llx", __entry->obj, __entry->backer, __print_symbolic(__entry->why, cachefiles_trunc_traces), @@ -549,7 +569,7 @@ TRACE_EVENT(cachefiles_mark_active, __entry->inode = inode->i_ino; ), - TP_printk("o=%08x i=%lx", + TP_printk("o=%08x B=%lx", __entry->obj, __entry->inode) ); @@ -570,7 +590,7 @@ TRACE_EVENT(cachefiles_mark_inactive, __entry->inode = inode->i_ino; ), - TP_printk("o=%08x i=%lx", + TP_printk("o=%08x B=%lx", __entry->obj, __entry->inode) ); @@ -594,7 +614,7 @@ TRACE_EVENT(cachefiles_vfs_error, __entry->where = where; ), - TP_printk("o=%08x b=%08x %s e=%d", + TP_printk("o=%08x B=%x %s e=%d", __entry->obj, __entry->backer, __print_symbolic(__entry->where, cachefiles_error_traces), @@ -621,7 +641,7 @@ TRACE_EVENT(cachefiles_io_error, __entry->where = where; ), - TP_printk("o=%08x b=%08x %s e=%d", + TP_printk("o=%08x B=%x %s e=%d", __entry->obj, __entry->backer, __print_symbolic(__entry->where, cachefiles_error_traces), -- cgit v1.2.3 From b64a3314989df8e44c114f377808407f36dbf4f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Jan 2022 11:05:13 +0000 Subject: cachefiles: Trace active-mark failure Add a tracepoint to log failure to apply an active mark to a file in addition to tracing successfully setting and unsetting the mark. Also include the backing file inode number in the message logged to dmesg. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/164251404666.3435901.17331742792401482190.stgit@warthog.procyon.org.uk/ # v1 --- include/trace/events/cachefiles.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index 093c4acb7a3a..c6f5aa74db89 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -573,6 +573,27 @@ TRACE_EVENT(cachefiles_mark_active, __entry->obj, __entry->inode) ); +TRACE_EVENT(cachefiles_mark_failed, + TP_PROTO(struct cachefiles_object *obj, + struct inode *inode), + + TP_ARGS(obj, inode), + + /* Note that obj may be NULL */ + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(ino_t, inode ) + ), + + TP_fast_assign( + __entry->obj = obj ? obj->debug_id : 0; + __entry->inode = inode->i_ino; + ), + + TP_printk("o=%08x B=%lx", + __entry->obj, __entry->inode) + ); + TRACE_EVENT(cachefiles_mark_inactive, TP_PROTO(struct cachefiles_object *obj, struct inode *inode), -- cgit v1.2.3 From c522e3ad296b7b692ed3960dfde467f2a34b434f Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 7 Jan 2022 09:28:41 +0000 Subject: fscache: Add a comment explaining how page-release optimisation works Add a comment into fscache_note_page_release() to explain how the page-release optimisation logic works[1]. It's not entirely obvious as it has nothing to do with whether or not the netfs file contains data. FSCACHE_COOKIE_NO_DATA_TO_READ is set if we have no data in the cache yet (ie. the backing file lookup was negative, the file is 0 length or the cookie got invalidated). It means that we have no data in the cache, not that the file is necessarily empty on the server. FSCACHE_COOKIE_HAVE_DATA is set once we've stored data in the backing file. From that point on, we have data we *could* read - however, it's covered by pages in the netfs pagecache until at such time one of those covering pages is released. So if we've written data to the cache (HAVE_DATA) and there wasn't any data in the cache when we started (NO_DATA_TO_READ), it may no longer be true that we can skip reading from the cache. Read skipping is done by cachefiles_prepare_read(). Note that tracking is not done on a per-page basis, but only on a per-file basis. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/043a206f03929c2667a465314144e518070a9b2d.camel@kernel.org/ [1] Link: https://lore.kernel.org/r/164251408479.3435901.9540165422908194636.stgit@warthog.procyon.org.uk/ # v1 --- include/linux/fscache.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index ede50406bcb0..296c5f1d9f35 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -665,6 +665,11 @@ static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie, static inline void fscache_note_page_release(struct fscache_cookie *cookie) { + /* If we've written data to the cache (HAVE_DATA) and there wasn't any + * data in the cache when we started (NO_DATA_TO_READ), it may no + * longer be true that we can skip reading from the cache - so clear + * the flag that causes reads to be skipped. + */ if (cookie && test_bit(FSCACHE_COOKIE_HAVE_DATA, &cookie->flags) && test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) -- cgit v1.2.3 From ffa65753c43142f3b803486442813744da71cff2 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 21 Jan 2022 22:10:46 -0800 Subject: mm/migrate.c: rework migration_entry_wait() to not take a pageref This fixes the FIXME in migrate_vma_check_page(). Before migrating a page migration code will take a reference and check there are no unexpected page references, failing the migration if there are. When a thread faults on a migration entry it will take a temporary reference to the page to wait for the page to become unlocked signifying the migration entry has been removed. This reference is dropped just prior to waiting on the page lock, however the extra reference can cause migration failures so it is desirable to avoid taking it. As migration code already has a reference to the migrating page an extra reference to wait on PG_locked is unnecessary so long as the reference can't be dropped whilst setting up the wait. When faulting on a migration entry the ptl is taken to check the migration entry. Removing a migration entry also requires the ptl, and migration code won't drop its page reference until after the migration entry has been removed. Therefore retaining the ptl of a migration entry is sufficient to ensure the page has a reference. Reworking migration_entry_wait() to hold the ptl until the wait setup is complete means the extra page reference is no longer needed. [apopple@nvidia.com: v5] Link: https://lkml.kernel.org/r/20211213033848.1973946-1-apopple@nvidia.com Link: https://lkml.kernel.org/r/20211118020754.954425-1-apopple@nvidia.com Signed-off-by: Alistair Popple Acked-by: David Hildenbrand Cc: David Howells Cc: Hugh Dickins Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: John Hubbard Cc: Matthew Wilcox (Oracle) Cc: Ralph Campbell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 4850cc5bf813..db96e10eb8da 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -40,6 +40,8 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); +void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep, + spinlock_t *ptl); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); void folio_migrate_copy(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, -- cgit v1.2.3 From 3ddd9a808cee7284931312f2f3e854c9617f44b2 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 21 Jan 2022 22:10:50 -0800 Subject: sysctl: add a new register_sysctl_init() interface Patch series "sysctl: first set of kernel/sysctl cleanups", v2. Finally had time to respin the series of the work we had started last year on cleaning up the kernel/sysct.c kitchen sink. People keeps stuffing their sysctls in that file and this creates a maintenance burden. So this effort is aimed at placing sysctls where they actually belong. I'm going to split patches up into series as there is quite a bit of work. This first set adds register_sysctl_init() for uses of registerting a sysctl on the init path, adds const where missing to a few places, generalizes common values so to be more easy to share, and starts the move of a few kernel/sysctl.c out where they belong. The majority of rework on v2 in this first patch set is 0-day fixes. Eric Biederman's feedback is later addressed in subsequent patch sets. I'll only post the first two patch sets for now. We can address the rest once the first two patch sets get completely reviewed / Acked. This patch (of 9): The kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. Today though folks heavily rely on tables on kernel/sysctl.c so they can easily just extend this table with their needed sysctls. In order to help users move their sysctls out we need to provide a helper which can be used during code initialization. We special-case the initialization use of register_sysctl() since it *is* safe to fail, given all that sysctls do is provide a dynamic interface to query or modify at runtime an existing variable. So the use case of register_sysctl() on init should *not* stop if the sysctls don't end up getting registered. It would be counter productive to stop boot if a simple sysctl registration failed. Provide a helper for init then, and document the recommended init levels to use for callers of this routine. We will later use this in subsequent patches to start slimming down kernel/sysctl.c tables and moving sysctl registration to the code which actually needs these sysctls. [mcgrof@kernel.org: major commit log and documentation rephrasing also moved to fs/proc/proc_sysctl.c ] Link: https://lkml.kernel.org/r/20211123202347.818157-1-mcgrof@kernel.org Link: https://lkml.kernel.org/r/20211123202347.818157-2-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Reviewed-by: Kees Cook Cc: Iurii Zaikin Cc: "Eric W. Biederman" Cc: Peter Zijlstra Cc: Greg Kroah-Hartman Cc: Paul Turner Cc: Andy Shevchenko Cc: Sebastian Reichel Cc: Tetsuo Handa Cc: Petr Mladek Cc: Sergey Senozhatsky Cc: Qing Wang Cc: Benjamin LaHaise Cc: Al Viro Cc: Jan Kara Cc: Amir Goldstein Cc: Stephen Kitt Cc: Antti Palosaari Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Clemens Ladisch Cc: David Airlie Cc: Jani Nikula Cc: Joel Becker Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Lukas Middendorf Cc: Mark Fasheh Cc: Phillip Potter Cc: Rodrigo Vivi Cc: Douglas Gilbert Cc: James E.J. Bottomley Cc: Jani Nikula Cc: John Ogness Cc: Martin K. Petersen Cc: "Rafael J. Wysocki" Cc: Steven Rostedt (VMware) Cc: Suren Baghdasaryan Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 1fa2b69c6fc3..d3ab7969b6b5 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -199,6 +199,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); +extern void __register_sysctl_init(const char *path, struct ctl_table *table, + const char *table_name); +#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table) void do_sysctl_args(void); extern int pwrsw_enabled; -- cgit v1.2.3 From 78e36f3b0dae586f623c4a37ec5eb5496f5abbe1 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 21 Jan 2022 22:10:55 -0800 Subject: sysctl: move some boundary constants from sysctl.c to sysctl_vals sysctl has helpers which let us specify boundary values for a min or max int value. Since these are used for a boundary check only they don't change, so move these variables to sysctl_vals to avoid adding duplicate variables. This will help with our cleanup of kernel/sysctl.c. [akpm@linux-foundation.org: update it for "mm/pagealloc: sysctl: change watermark_scale_factor max limit to 30%"] [mcgrof@kernel.org: major rebase] Link: https://lkml.kernel.org/r/20211123202347.818157-3-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Reviewed-by: Kees Cook Cc: Al Viro Cc: Amir Goldstein Cc: Andy Shevchenko Cc: Benjamin LaHaise Cc: "Eric W. Biederman" Cc: Greg Kroah-Hartman Cc: Iurii Zaikin Cc: Jan Kara Cc: Paul Turner Cc: Peter Zijlstra Cc: Petr Mladek Cc: Qing Wang Cc: Sebastian Reichel Cc: Sergey Senozhatsky Cc: Stephen Kitt Cc: Tetsuo Handa Cc: Antti Palosaari Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Clemens Ladisch Cc: David Airlie Cc: Jani Nikula Cc: Joel Becker Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Lukas Middendorf Cc: Mark Fasheh Cc: Phillip Potter Cc: Rodrigo Vivi Cc: Douglas Gilbert Cc: James E.J. Bottomley Cc: Jani Nikula Cc: John Ogness Cc: Martin K. Petersen Cc: "Rafael J. Wysocki" Cc: Steven Rostedt (VMware) Cc: Suren Baghdasaryan Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index d3ab7969b6b5..47cf70c8eb93 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -38,9 +38,16 @@ struct ctl_table_header; struct ctl_dir; /* Keep the same order as in fs/proc/proc_sysctl.c */ -#define SYSCTL_ZERO ((void *)&sysctl_vals[0]) -#define SYSCTL_ONE ((void *)&sysctl_vals[1]) -#define SYSCTL_INT_MAX ((void *)&sysctl_vals[2]) +#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[0]) +#define SYSCTL_ZERO ((void *)&sysctl_vals[1]) +#define SYSCTL_ONE ((void *)&sysctl_vals[2]) +#define SYSCTL_TWO ((void *)&sysctl_vals[3]) +#define SYSCTL_FOUR ((void *)&sysctl_vals[4]) +#define SYSCTL_ONE_HUNDRED ((void *)&sysctl_vals[5]) +#define SYSCTL_TWO_HUNDRED ((void *)&sysctl_vals[6]) +#define SYSCTL_ONE_THOUSAND ((void *)&sysctl_vals[7]) +#define SYSCTL_THREE_THOUSAND ((void *)&sysctl_vals[8]) +#define SYSCTL_INT_MAX ((void *)&sysctl_vals[9]) extern const int sysctl_vals[]; -- cgit v1.2.3 From bbe7a10ed83a5fa0b0ff6161ecdc4e65a0e9c993 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 21 Jan 2022 22:11:00 -0800 Subject: hung_task: move hung_task sysctl interface to hung_task.c The kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. So move hung_task sysctl interface to hung_task.c and use register_sysctl() to register the sysctl interface. [mcgrof@kernel.org: commit log refresh and fixed 2-3 0day reported compile issues] Link: https://lkml.kernel.org/r/20211123202347.818157-4-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Reviewed-by: Kees Cook Reviewed-by: Petr Mladek Cc: Al Viro Cc: Amir Goldstein Cc: Andy Shevchenko Cc: Benjamin LaHaise Cc: "Eric W. Biederman" Cc: Greg Kroah-Hartman Cc: Iurii Zaikin Cc: Jan Kara Cc: Paul Turner Cc: Peter Zijlstra Cc: Qing Wang Cc: Sebastian Reichel Cc: Sergey Senozhatsky Cc: Stephen Kitt Cc: Tetsuo Handa Cc: Antti Palosaari Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Clemens Ladisch Cc: David Airlie Cc: Jani Nikula Cc: Joel Becker Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Lukas Middendorf Cc: Mark Fasheh Cc: Phillip Potter Cc: Rodrigo Vivi Cc: Douglas Gilbert Cc: James E.J. Bottomley Cc: Jani Nikula Cc: John Ogness Cc: Martin K. Petersen Cc: "Rafael J. Wysocki" Cc: Steven Rostedt (VMware) Cc: Suren Baghdasaryan Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/sysctl.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 304f431178fd..c19dd5a2c05c 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -7,20 +7,8 @@ struct ctl_table; #ifdef CONFIG_DETECT_HUNG_TASK - -#ifdef CONFIG_SMP -extern unsigned int sysctl_hung_task_all_cpu_backtrace; -#else -#define sysctl_hung_task_all_cpu_backtrace 0 -#endif /* CONFIG_SMP */ - -extern int sysctl_hung_task_check_count; -extern unsigned int sysctl_hung_task_panic; +/* used for hung_task and block/ */ extern unsigned long sysctl_hung_task_timeout_secs; -extern unsigned long sysctl_hung_task_check_interval_secs; -extern int sysctl_hung_task_warnings; -int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); #else /* Avoid need for ifdefs elsewhere in the code */ enum { sysctl_hung_task_timeout_secs = 0 }; -- cgit v1.2.3 From 86b12b6c5d6b46e64bf2e8080528781032e4bd90 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 21 Jan 2022 22:11:24 -0800 Subject: aio: move aio sysctl to aio.c The kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. Move aio sysctl to aio.c and use the new register_sysctl_init() to register the sysctl interface for aio. [mcgrof@kernel.org: adjust commit log to justify the move] Link: https://lkml.kernel.org/r/20211123202347.818157-9-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Reviewed-by: Jan Kara Cc: Al Viro Cc: Amir Goldstein Cc: Andy Shevchenko Cc: Benjamin LaHaise Cc: "Eric W. Biederman" Cc: Greg Kroah-Hartman Cc: Iurii Zaikin Cc: Kees Cook Cc: Paul Turner Cc: Peter Zijlstra Cc: Petr Mladek Cc: Qing Wang Cc: Sebastian Reichel Cc: Sergey Senozhatsky Cc: Stephen Kitt Cc: Tetsuo Handa Cc: Antti Palosaari Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Clemens Ladisch Cc: David Airlie Cc: Jani Nikula Cc: Joel Becker Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Lukas Middendorf Cc: Mark Fasheh Cc: Phillip Potter Cc: Rodrigo Vivi Cc: Douglas Gilbert Cc: James E.J. Bottomley Cc: Jani Nikula Cc: John Ogness Cc: Martin K. Petersen Cc: "Rafael J. Wysocki" Cc: Steven Rostedt (VMware) Cc: Suren Baghdasaryan Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/aio.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/aio.h b/include/linux/aio.h index b83e68dd006f..86892a4fe7c8 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -20,8 +20,4 @@ static inline void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) { } #endif /* CONFIG_AIO */ -/* for sysctl: */ -extern unsigned long aio_nr; -extern unsigned long aio_max_nr; - #endif /* __LINUX__AIO_H */ -- cgit v1.2.3 From 49a4de75719b6c0f1f375df9908a95cef1e34945 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 21 Jan 2022 22:11:29 -0800 Subject: dnotify: move dnotify sysctl to dnotify.c The kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. So move dnotify sysctls to dnotify.c and use the new register_sysctl_init() to register the sysctl interface. [mcgrof@kernel.org: adjust the commit log to justify the move] Link: https://lkml.kernel.org/r/20211123202347.818157-10-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Acked-by: Jan Kara Cc: Al Viro Cc: Amir Goldstein Cc: Andy Shevchenko Cc: Benjamin LaHaise Cc: "Eric W. Biederman" Cc: Greg Kroah-Hartman Cc: Iurii Zaikin Cc: Kees Cook Cc: Paul Turner Cc: Peter Zijlstra Cc: Petr Mladek Cc: Qing Wang Cc: Sebastian Reichel Cc: Sergey Senozhatsky Cc: Stephen Kitt Cc: Tetsuo Handa Cc: Antti Palosaari Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Clemens Ladisch Cc: David Airlie Cc: Jani Nikula Cc: Joel Becker Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Lukas Middendorf Cc: Mark Fasheh Cc: Phillip Potter Cc: Rodrigo Vivi Cc: Douglas Gilbert Cc: James E.J. Bottomley Cc: Jani Nikula Cc: John Ogness Cc: Martin K. Petersen Cc: "Rafael J. Wysocki" Cc: Steven Rostedt (VMware) Cc: Suren Baghdasaryan Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dnotify.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h index b87c3b85a166..b1d26f9f1c9f 100644 --- a/include/linux/dnotify.h +++ b/include/linux/dnotify.h @@ -29,7 +29,6 @@ struct dnotify_struct { FS_CREATE | FS_RENAME |\ FS_MOVED_FROM | FS_MOVED_TO) -extern int dir_notify_enable; extern void dnotify_flush(struct file *, fl_owner_t); extern int fcntl_dirnotify(int, struct file *, unsigned long); -- cgit v1.2.3 From 7b9ad122b52c9839e1f68f16c907990a6ad6f793 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 21 Jan 2022 22:11:59 -0800 Subject: inotify: simplify subdirectory registration with register_sysctl() There is no need to user boiler plate code to specify a set of base directories we're going to stuff sysctls under. Simplify this by using register_sysctl() and specifying the directory path directly. Move inotify_user sysctl to inotify_user.c while at it to remove clutter from kernel/sysctl.c. [mcgrof@kernel.org: remember to register fanotify_table] Link: https://lkml.kernel.org/r/YZ5A6iWLb0h3N3RC@bombadil.infradead.org [mcgrof@kernel.org: update commit log to reflect new path we decided to take] Link: https://lkml.kernel.org/r/20211123202422.819032-7-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Amir Goldstein Cc: Andy Shevchenko Cc: Antti Palosaari Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Benjamin LaHaise Cc: Clemens Ladisch Cc: David Airlie Cc: "Eric W. Biederman" Cc: Greg Kroah-Hartman Cc: Iurii Zaikin Cc: Jani Nikula Cc: Jan Kara Cc: Joel Becker Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Kees Cook Cc: Lukas Middendorf Cc: Mark Fasheh Cc: Paul Turner Cc: Peter Zijlstra Cc: Petr Mladek Cc: Phillip Potter Cc: Qing Wang Cc: Rodrigo Vivi Cc: Sebastian Reichel Cc: Sergey Senozhatsky Cc: Stephen Kitt Cc: Tetsuo Handa Cc: Douglas Gilbert Cc: James E.J. Bottomley Cc: Jani Nikula Cc: John Ogness Cc: Martin K. Petersen Cc: "Rafael J. Wysocki" Cc: Steven Rostedt (VMware) Cc: Suren Baghdasaryan Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fanotify.h | 2 -- include/linux/inotify.h | 3 --- 2 files changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 3afdf339d53c..419cadcd7ff5 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -5,8 +5,6 @@ #include #include -extern struct ctl_table fanotify_table[]; /* for sysctl */ - #define FAN_GROUP_FLAG(group, flag) \ ((group)->fanotify_data.flags & (flag)) diff --git a/include/linux/inotify.h b/include/linux/inotify.h index 6a24905f6e1e..8d20caa1b268 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h @@ -7,11 +7,8 @@ #ifndef _LINUX_INOTIFY_H #define _LINUX_INOTIFY_H -#include #include -extern struct ctl_table inotify_table[]; /* for sysctl */ - #define ALL_INOTIFY_BITS (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \ IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | \ IN_MOVED_TO | IN_CREATE | IN_DELETE | \ -- cgit v1.2.3 From a8f5de894f76f1c73f4a068d04897a5e2f873825 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 21 Jan 2022 22:12:09 -0800 Subject: eventpoll: simplify sysctl declaration with register_sysctl() The kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. So move the epoll_table sysctl to fs/eventpoll.c and use register_sysctl(). Link: https://lkml.kernel.org/r/20211123202422.819032-9-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Amir Goldstein Cc: Andy Shevchenko Cc: Antti Palosaari Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Benjamin LaHaise Cc: Clemens Ladisch Cc: David Airlie Cc: "Eric W. Biederman" Cc: Greg Kroah-Hartman Cc: Iurii Zaikin Cc: Jani Nikula Cc: Jan Kara Cc: Joel Becker Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Kees Cook Cc: Lukas Middendorf Cc: Mark Fasheh Cc: Paul Turner Cc: Peter Zijlstra Cc: Petr Mladek Cc: Phillip Potter Cc: Qing Wang Cc: Rodrigo Vivi Cc: Sebastian Reichel Cc: Sergey Senozhatsky Cc: Stephen Kitt Cc: Tetsuo Handa Cc: Douglas Gilbert Cc: James E.J. Bottomley Cc: Jani Nikula Cc: John Ogness Cc: Martin K. Petersen Cc: "Rafael J. Wysocki" Cc: Steven Rostedt (VMware) Cc: Suren Baghdasaryan Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/poll.h | 2 -- include/linux/sysctl.h | 1 - 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/poll.h b/include/linux/poll.h index 1cdc32b1f1b0..a9e0e1c2d1f2 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -8,12 +8,10 @@ #include #include #include -#include #include #include #include -extern struct ctl_table epoll_table[]; /* for sysctl */ /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating additional memory. */ #ifdef __clang__ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 47cf70c8eb93..6dd0f277f844 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -219,7 +219,6 @@ extern int no_unaligned_warning; extern struct ctl_table sysctl_mount_point[]; extern struct ctl_table random_table[]; extern struct ctl_table firmware_config_table[]; -extern struct ctl_table epoll_table[]; #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) -- cgit v1.2.3 From 6aad36d421d8bfe156508fa4edfe67827234cf0f Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 21 Jan 2022 22:12:13 -0800 Subject: firmware_loader: move firmware sysctl to its own files Patch series "sysctl: 3rd set of kernel/sysctl cleanups", v2. This is the third set of patches to help address cleaning the kitchen seink in kernel/sysctl.c and to move sysctls away to where they are actually implemented / used. This patch (of 8): kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. So move the firmware configuration sysctl table to the only place where it is used, and make it clear that if sysctls are disabled this is not used. [akpm@linux-foundation.org: export register_firmware_config_sysctl and unregister_firmware_config_sysctl to modules] [akpm@linux-foundation.org: use EXPORT_SYMBOL_NS_GPL instead] [sfr@canb.auug.org.au: fix that so it compiles] Link: https://lkml.kernel.org/r/20211201160626.401d828d@canb.auug.org.au [mcgrof@kernel.org: major commit log update to justify the move] Link: https://lkml.kernel.org/r/20211124231435.1445213-1-mcgrof@kernel.org Link: https://lkml.kernel.org/r/20211124231435.1445213-2-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Signed-off-by: Stephen Rothwell Cc: Kees Cook Cc: Iurii Zaikin Cc: Eric Biederman Cc: Stephen Kitt Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: "Theodore Ts'o" Cc: Al Viro Cc: Petr Mladek Cc: Sergey Senozhatsky Cc: Steven Rostedt (VMware) Cc: John Ogness Cc: Douglas Gilbert Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: Lukas Middendorf Cc: Antti Palosaari Cc: Amir Goldstein Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Benjamin LaHaise Cc: Clemens Ladisch Cc: David Airlie Cc: Jani Nikula Cc: Jani Nikula Cc: Jan Kara Cc: Joel Becker Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Mark Fasheh Cc: Paul Turner Cc: Peter Zijlstra Cc: Phillip Potter Cc: Qing Wang Cc: Rodrigo Vivi Cc: Sebastian Reichel Cc: Suren Baghdasaryan Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 6dd0f277f844..3985e9c80155 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -218,7 +218,6 @@ extern int no_unaligned_warning; extern struct ctl_table sysctl_mount_point[]; extern struct ctl_table random_table[]; -extern struct ctl_table firmware_config_table[]; #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) -- cgit v1.2.3 From 5475e8f03c80bbce7b43a57d861f5acc44a60b22 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 21 Jan 2022 22:12:18 -0800 Subject: random: move the random sysctl declarations to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. So move the random sysctls to their own file and use register_sysctl_init(). [mcgrof@kernel.org: commit log update to justify the move] Link: https://lkml.kernel.org/r/20211124231435.1445213-3-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Amir Goldstein Cc: Andy Shevchenko Cc: Antti Palosaari Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Benjamin LaHaise Cc: Clemens Ladisch Cc: David Airlie Cc: Douglas Gilbert Cc: Eric Biederman Cc: Greg Kroah-Hartman Cc: Iurii Zaikin Cc: James E.J. Bottomley Cc: Jani Nikula Cc: Jani Nikula Cc: Jan Kara Cc: Joel Becker Cc: John Ogness Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Kees Cook Cc: Lukas Middendorf Cc: Mark Fasheh Cc: Martin K. Petersen Cc: Paul Turner Cc: Peter Zijlstra Cc: Petr Mladek Cc: Phillip Potter Cc: Qing Wang Cc: "Rafael J. Wysocki" Cc: Rodrigo Vivi Cc: Sebastian Reichel Cc: Sergey Senozhatsky Cc: Stephen Kitt Cc: Steven Rostedt (VMware) Cc: Suren Baghdasaryan Cc: Tetsuo Handa Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 3985e9c80155..fce05a060bc5 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -217,7 +217,6 @@ extern int unaligned_dump_stack; extern int no_unaligned_warning; extern struct ctl_table sysctl_mount_point[]; -extern struct ctl_table random_table[]; #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) -- cgit v1.2.3 From ee9efac48a082904d17a20131aa73d82f058cdd6 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 21 Jan 2022 22:12:23 -0800 Subject: sysctl: add helper to register a sysctl mount point The way to create a subdirectory on top of sysctl_mount_point is a bit obscure, and *why* we do that even so more. Provide a helper which makes it clear why we do this. [akpm@linux-foundation.org: export register_sysctl_mount_point() to modules] Link: https://lkml.kernel.org/r/20211124231435.1445213-4-mcgrof@kernel.org Signed-off-by: Luis Chamberlain Suggested-by: "Eric W. Biederman" Cc: Al Viro Cc: Amir Goldstein Cc: Andy Shevchenko Cc: Antti Palosaari Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Benjamin LaHaise Cc: Clemens Ladisch Cc: David Airlie Cc: Douglas Gilbert Cc: Greg Kroah-Hartman Cc: Iurii Zaikin Cc: James E.J. Bottomley Cc: Jani Nikula Cc: Jani Nikula Cc: Jan Kara Cc: Joel Becker Cc: John Ogness Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Kees Cook Cc: Lukas Middendorf Cc: Mark Fasheh Cc: Martin K. Petersen Cc: Paul Turner Cc: Peter Zijlstra Cc: Petr Mladek Cc: Phillip Potter Cc: Qing Wang Cc: "Rafael J. Wysocki" Cc: Rodrigo Vivi Cc: Sebastian Reichel Cc: Sergey Senozhatsky Cc: Stephen Kitt Cc: Steven Rostedt (VMware) Cc: Suren Baghdasaryan Cc: Tetsuo Handa Cc: "Theodore Ts'o" Cc: Xiaoming Ni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index fce05a060bc5..746c098a6ff5 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -209,6 +209,8 @@ extern int sysctl_init(void); extern void __register_sysctl_init(const char *path, struct ctl_table *table, const char *table_name); #define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table) +extern struct ctl_table_header *register_sysctl_mount_point(const char *path); + void do_sysctl_args(void); extern int pwrsw_enabled; @@ -224,6 +226,11 @@ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * return NULL; } +static inline struct sysctl_header *register_sysctl_mount_point(const char *path) +{ + return NULL; +} + static inline struct ctl_table_header *register_sysctl_paths( const struct ctl_path *path, struct ctl_table *table) { -- cgit v1.2.3 From 26d1c80fd61e59d5e2eb6fda00e0148a6704ddeb Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 21 Jan 2022 22:12:38 -0800 Subject: scsi/sg: move sg-big-buff sysctl to scsi/sg.c kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. So move the sg-big-buff sysctl from kernel/sysctl.c to drivers/scsi/sg.c and use register_sysctl() to register the sysctl interface. [mcgrof@kernel.org: commit log update] Link: https://lkml.kernel.org/r/20211124231435.1445213-7-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Amir Goldstein Cc: Andy Shevchenko Cc: Antti Palosaari Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Benjamin LaHaise Cc: Clemens Ladisch Cc: David Airlie Cc: Douglas Gilbert Cc: Eric Biederman Cc: Greg Kroah-Hartman Cc: Iurii Zaikin Cc: James E.J. Bottomley Cc: Jani Nikula Cc: Jani Nikula Cc: Jan Kara Cc: Joel Becker Cc: John Ogness Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Kees Cook Cc: Lukas Middendorf Cc: Mark Fasheh Cc: Martin K. Petersen Cc: Paul Turner Cc: Peter Zijlstra Cc: Petr Mladek Cc: Phillip Potter Cc: Qing Wang Cc: "Rafael J. Wysocki" Cc: Rodrigo Vivi Cc: Sebastian Reichel Cc: Sergey Senozhatsky Cc: Stephen Kitt Cc: Steven Rostedt (VMware) Cc: Suren Baghdasaryan Cc: Tetsuo Handa Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/scsi/sg.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/scsi/sg.h b/include/scsi/sg.h index 843cefb8efce..068e35d36557 100644 --- a/include/scsi/sg.h +++ b/include/scsi/sg.h @@ -29,10 +29,6 @@ * For utility and test programs see: http://sg.danny.cz/sg/sg3_utils.html */ -#ifdef __KERNEL__ -extern int sg_big_buff; /* for sysctl */ -#endif - typedef struct sg_iovec /* same structure as used by readv() Linux system */ { /* call. It defines one scatter-gather element. */ -- cgit v1.2.3 From 0df8bdd5e3b3e557ce2c2575fce0c64c5dd1045a Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 21 Jan 2022 22:12:43 -0800 Subject: stackleak: move stack_erasing sysctl to stackleak.c kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. So move the stack_erasing sysctl from kernel/sysctl.c to kernel/stackleak.c and use register_sysctl() to register the sysctl interface. [mcgrof@kernel.org: commit log update] Link: https://lkml.kernel.org/r/20211124231435.1445213-8-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Amir Goldstein Cc: Andy Shevchenko Cc: Antti Palosaari Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Benjamin LaHaise Cc: Clemens Ladisch Cc: David Airlie Cc: Douglas Gilbert Cc: Eric Biederman Cc: Greg Kroah-Hartman Cc: Iurii Zaikin Cc: James E.J. Bottomley Cc: Jani Nikula Cc: Jani Nikula Cc: Jan Kara Cc: Joel Becker Cc: John Ogness Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Kees Cook Cc: Lukas Middendorf Cc: Mark Fasheh Cc: Martin K. Petersen Cc: Paul Turner Cc: Peter Zijlstra Cc: Petr Mladek Cc: Phillip Potter Cc: Qing Wang Cc: "Rafael J. Wysocki" Cc: Rodrigo Vivi Cc: Sebastian Reichel Cc: Sergey Senozhatsky Cc: Stephen Kitt Cc: Steven Rostedt (VMware) Cc: Suren Baghdasaryan Cc: Tetsuo Handa Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/stackleak.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h index a59db2f08e76..ccaab2043fcd 100644 --- a/include/linux/stackleak.h +++ b/include/linux/stackleak.h @@ -23,11 +23,6 @@ static inline void stackleak_task_init(struct task_struct *t) # endif } -#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE -int stack_erasing_sysctl(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -#endif - #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */ static inline void stackleak_task_init(struct task_struct *t) { } #endif -- cgit v1.2.3 From b1f2aff888af54a057c2c3c0d88a13ef5d37b52a Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 21 Jan 2022 22:12:48 -0800 Subject: sysctl: share unsigned long const values Provide a way to share unsigned long values. This will allow others to not have to re-invent these values. Link: https://lkml.kernel.org/r/20211124231435.1445213-9-mcgrof@kernel.org Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Amir Goldstein Cc: Andy Shevchenko Cc: Antti Palosaari Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Benjamin LaHaise Cc: Clemens Ladisch Cc: David Airlie Cc: Douglas Gilbert Cc: Eric Biederman Cc: Greg Kroah-Hartman Cc: Iurii Zaikin Cc: James E.J. Bottomley Cc: Jani Nikula Cc: Jani Nikula Cc: Jan Kara Cc: Joel Becker Cc: John Ogness Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Kees Cook Cc: Lukas Middendorf Cc: Mark Fasheh Cc: Martin K. Petersen Cc: Paul Turner Cc: Peter Zijlstra Cc: Petr Mladek Cc: Phillip Potter Cc: Qing Wang Cc: "Rafael J. Wysocki" Cc: Rodrigo Vivi Cc: Sebastian Reichel Cc: Sergey Senozhatsky Cc: Stephen Kitt Cc: Steven Rostedt (VMware) Cc: Suren Baghdasaryan Cc: Tetsuo Handa Cc: "Theodore Ts'o" Cc: Xiaoming Ni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 746c098a6ff5..2de6d20d191b 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -51,6 +51,12 @@ struct ctl_dir; extern const int sysctl_vals[]; +#define SYSCTL_LONG_ZERO ((void *)&sysctl_long_vals[0]) +#define SYSCTL_LONG_ONE ((void *)&sysctl_long_vals[1]) +#define SYSCTL_LONG_MAX ((void *)&sysctl_long_vals[2]) + +extern const unsigned long sysctl_long_vals[]; + typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); -- cgit v1.2.3 From 1d67fe585049d3e2448b997af78c68cbf90ada09 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 21 Jan 2022 22:12:52 -0800 Subject: fs: move inode sysctls to its own file Patch series "sysctl: 4th set of kernel/sysctl cleanups". This is slimming down the fs uses of kernel/sysctl.c to the point that the next step is to just get rid of the fs base directory for it and move that elsehwere, so that next patch series starts dealing with that to demo how we can end up cleaning up a full base directory from kernel/sysctl.c, one at a time. This patch (of 9): kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. So move the inode sysctls to its own file. Since we are no longer using this outside of fs/ remove the extern declaration of its respective proc helper. We use early_initcall() as it is the earliest we can use. [arnd@arndb.de: avoid unused-variable warning] Link: https://lkml.kernel.org/r/20211203190123.874239-1-arnd@kernel.org Link: https://lkml.kernel.org/r/20211129205548.605569-1-mcgrof@kernel.org Link: https://lkml.kernel.org/r/20211129205548.605569-2-mcgrof@kernel.org Signed-off-by: Luis Chamberlain Signed-off-by: Arnd Bergmann Cc: Al Viro Cc: Kees Cook Cc: Iurii Zaikin Cc: Xiaoming Ni Cc: Eric Biederman Cc: Stephen Kitt Cc: Lukas Middendorf Cc: Antti Palosaari Cc: Andy Shevchenko Cc: Jeff Layton Cc: "J. Bruce Fields" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index c8510da6cc6d..044de67c8167 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -82,7 +82,6 @@ extern void __init files_maxfiles_init(void); extern struct files_stat_struct files_stat; extern unsigned long get_max_files(void); extern unsigned int sysctl_nr_open; -extern struct inodes_stat_t inodes_stat; extern int leases_enable, lease_break_time; extern int sysctl_protected_symlinks; extern int sysctl_protected_hardlinks; @@ -3537,8 +3536,6 @@ int proc_nr_files(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int proc_nr_dentry(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -int proc_nr_inodes(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); int __init list_bdev_fs_names(char *buf, size_t size); #define __FMODE_EXEC ((__force int) FMODE_EXEC) -- cgit v1.2.3 From 204d5a24e15562b2816825c0f9b49d26814b77be Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 21 Jan 2022 22:12:56 -0800 Subject: fs: move fs stat sysctls to file_table.c kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. We can create the sysctl dynamically on early init for fs stat to help with this clutter. This dusts off the fs stat syctls knobs and puts them into where they are declared. Link: https://lkml.kernel.org/r/20211129205548.605569-3-mcgrof@kernel.org Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Andy Shevchenko Cc: Antti Palosaari Cc: Eric Biederman Cc: Iurii Zaikin Cc: "J. Bruce Fields" Cc: Jeff Layton Cc: Kees Cook Cc: Lukas Middendorf Cc: Stephen Kitt Cc: Xiaoming Ni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 044de67c8167..1e6761966120 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -79,7 +79,6 @@ extern void __init inode_init_early(void); extern void __init files_init(void); extern void __init files_maxfiles_init(void); -extern struct files_stat_struct files_stat; extern unsigned long get_max_files(void); extern unsigned int sysctl_nr_open; extern int leases_enable, lease_break_time; @@ -3532,8 +3531,6 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); struct ctl_table; -int proc_nr_files(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); int proc_nr_dentry(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int __init list_bdev_fs_names(char *buf, size_t size); -- cgit v1.2.3 From c8c0c239d5ab1e3e8d2bb0453ce642fe2c6357ec Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 21 Jan 2022 22:12:59 -0800 Subject: fs: move dcache sysctls to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. So move the dcache sysctl clutter out of kernel/sysctl.c. This is a small one-off entry, perhaps later we can simplify this representation, but for now we use the helpers we have. We won't know how we can simplify this further untl we're fully done with the cleanup. [arnd@arndb.de: avoid unused-function warning] Link: https://lkml.kernel.org/r/20211203190123.874239-2-arnd@kernel.org Link: https://lkml.kernel.org/r/20211129205548.605569-4-mcgrof@kernel.org Signed-off-by: Luis Chamberlain Signed-off-by: Arnd Bergmann Cc: Al Viro Cc: Andy Shevchenko Cc: Antti Palosaari Cc: Eric Biederman Cc: Iurii Zaikin Cc: "J. Bruce Fields" Cc: Jeff Layton Cc: Kees Cook Cc: Lukas Middendorf Cc: Stephen Kitt Cc: Xiaoming Ni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dcache.h | 10 ---------- include/linux/fs.h | 2 -- 2 files changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 9e23d33bb6f1..f5bba51480b2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -61,16 +61,6 @@ extern const struct qstr empty_name; extern const struct qstr slash_name; extern const struct qstr dotdot_name; -struct dentry_stat_t { - long nr_dentry; - long nr_unused; - long age_limit; /* age in seconds */ - long want_pages; /* pages requested by system */ - long nr_negative; /* # of unused negative dentries */ - long dummy; /* Reserved for future use */ -}; -extern struct dentry_stat_t dentry_stat; - /* * Try to keep struct dentry aligned on 64 byte cachelines (this will * give reasonable cacheline footprint with larger lines without the diff --git a/include/linux/fs.h b/include/linux/fs.h index 1e6761966120..9b856d5da9e2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3531,8 +3531,6 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); struct ctl_table; -int proc_nr_dentry(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); int __init list_bdev_fs_names(char *buf, size_t size); #define __FMODE_EXEC ((__force int) FMODE_EXEC) -- cgit v1.2.3 From 54771613e8a7dbbba2a205ddf1b33e25a290b3fd Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 21 Jan 2022 22:13:03 -0800 Subject: sysctl: move maxolduid as a sysctl specific const MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The maxolduid value is only shared for sysctl purposes for use on a max range. Just stuff this into our shared const array. [akpm@linux-foundation.org: fix sysctl_vals[], per Mickaël] Link: https://lkml.kernel.org/r/20211129205548.605569-5-mcgrof@kernel.org Signed-off-by: Luis Chamberlain Signed-off-by: Mickaël Salaün Cc: Al Viro Cc: Andy Shevchenko Cc: Antti Palosaari Cc: Eric Biederman Cc: Iurii Zaikin Cc: "J. Bruce Fields" Cc: Jeff Layton Cc: Kees Cook Cc: Lukas Middendorf Cc: Stephen Kitt Cc: Xiaoming Ni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 2de6d20d191b..bb921eb8a02d 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -49,6 +49,9 @@ struct ctl_dir; #define SYSCTL_THREE_THOUSAND ((void *)&sysctl_vals[8]) #define SYSCTL_INT_MAX ((void *)&sysctl_vals[9]) +/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ +#define SYSCTL_MAXOLDUID ((void *)&sysctl_vals[10]) + extern const int sysctl_vals[]; #define SYSCTL_LONG_ZERO ((void *)&sysctl_long_vals[0]) -- cgit v1.2.3 From dd81faa88340a1fe8cd81c8ecbadd8e95c58549c Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 21 Jan 2022 22:13:10 -0800 Subject: fs: move locking sysctls where they are used kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. The locking fs sysctls are only used on fs/locks.c, so move them there. Link: https://lkml.kernel.org/r/20211129205548.605569-7-mcgrof@kernel.org Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Andy Shevchenko Cc: Antti Palosaari Cc: Eric Biederman Cc: Iurii Zaikin Cc: "J. Bruce Fields" Cc: Jeff Layton Cc: Kees Cook Cc: Lukas Middendorf Cc: Stephen Kitt Cc: Xiaoming Ni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9b856d5da9e2..0e08c3dd8f75 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -82,10 +82,6 @@ extern void __init files_maxfiles_init(void); extern unsigned long get_max_files(void); extern unsigned int sysctl_nr_open; extern int leases_enable, lease_break_time; -extern int sysctl_protected_symlinks; -extern int sysctl_protected_hardlinks; -extern int sysctl_protected_fifos; -extern int sysctl_protected_regular; typedef __kernel_rwf_t rwf_t; -- cgit v1.2.3 From 9c011be132972ff94bde2ae99064e29f94e85c68 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 21 Jan 2022 22:13:13 -0800 Subject: fs: move namei sysctls to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. So move namei's own sysctl knobs to its own file. Other than the move we also avoid initializing two static variables to 0 as this is not needed: * sysctl_protected_symlinks * sysctl_protected_hardlinks Link: https://lkml.kernel.org/r/20211129205548.605569-8-mcgrof@kernel.org Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Andy Shevchenko Cc: Antti Palosaari Cc: Eric Biederman Cc: Iurii Zaikin Cc: "J. Bruce Fields" Cc: Jeff Layton Cc: Kees Cook Cc: Lukas Middendorf Cc: Stephen Kitt Cc: Xiaoming Ni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0e08c3dd8f75..9617dea24978 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -81,7 +81,6 @@ extern void __init files_maxfiles_init(void); extern unsigned long get_max_files(void); extern unsigned int sysctl_nr_open; -extern int leases_enable, lease_break_time; typedef __kernel_rwf_t rwf_t; -- cgit v1.2.3 From 1998f19324d24df7de4e74d81503b4299eb99e7d Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 21 Jan 2022 22:13:20 -0800 Subject: fs: move pipe sysctls to is own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. So move the pipe sysctls to its own file. Link: https://lkml.kernel.org/r/20211129205548.605569-10-mcgrof@kernel.org Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Andy Shevchenko Cc: Antti Palosaari Cc: Eric Biederman Cc: Iurii Zaikin Cc: "J. Bruce Fields" Cc: Jeff Layton Cc: Kees Cook Cc: Lukas Middendorf Cc: Stephen Kitt Cc: Xiaoming Ni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 4 ---- include/linux/sysctl.h | 6 ++++++ 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index fc5642431b92..c00c618ef290 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -238,10 +238,6 @@ void pipe_lock(struct pipe_inode_info *); void pipe_unlock(struct pipe_inode_info *); void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); -extern unsigned int pipe_max_size; -extern unsigned long pipe_user_pages_hard; -extern unsigned long pipe_user_pages_soft; - /* Wait for a pipe to be readable/writable while dropping the pipe lock */ void pipe_wait_readable(struct pipe_inode_info *); void pipe_wait_writable(struct pipe_inode_info *); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index bb921eb8a02d..4294e9668bd5 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -221,6 +221,12 @@ extern void __register_sysctl_init(const char *path, struct ctl_table *table, extern struct ctl_table_header *register_sysctl_mount_point(const char *path); void do_sysctl_args(void); +int do_proc_douintvec(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos, + int (*conv)(unsigned long *lvalp, + unsigned int *valp, + int write, void *data), + void *data); extern int pwrsw_enabled; extern int unaligned_enabled; -- cgit v1.2.3 From 51cb8dfc5a5c39e6c70376b9dc9a14d624a9d271 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 21 Jan 2022 22:13:24 -0800 Subject: sysctl: add and use base directory declarer and registration helper Patch series "sysctl: add and use base directory declarer and registration helper". In this patch series we start addressing base directories, and so we start with the "fs" sysctls. The end goal is we end up completely moving all "fs" sysctl knobs out from kernel/sysctl. This patch (of 6): Add a set of helpers which can be used to declare and register base directory sysctls on their own. We do this so we can later move each of the base sysctl directories like "fs", "kernel", etc, to their own respective files instead of shoving the declarations and registrations all on kernel/sysctl.c. The lazy approach has caught up and with this, we just end up extending the list of base directories / sysctls on one file and this makes maintenance difficult due to merge conflicts from many developers. The declarations are used first by kernel/sysctl.c for registration its own base which over time we'll try to clean up. It will be used in the next patch to demonstrate how to cleanly deal with base sysctl directories. [mcgrof@kernel.org: null-terminate the ctl_table arrays] Link: https://lkml.kernel.org/r/YafJY3rXDYnjK/gs@bombadil.infradead.org Link: https://lkml.kernel.org/r/20211129211943.640266-1-mcgrof@kernel.org Link: https://lkml.kernel.org/r/20211129211943.640266-2-mcgrof@kernel.org Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Kees Cook Cc: Iurii Zaikin Cc: Xiaoming Ni Cc: Eric Biederman Cc: Stephen Kitt Cc: Lukas Middendorf Cc: Antti Palosaari Cc: Christian Brauner Cc: Eric Biggers Cc: "Naveen N. Rao" Cc: "David S. Miller" Cc: Masami Hiramatsu Cc: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 4294e9668bd5..02134a8abad7 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -194,6 +194,20 @@ struct ctl_path { #ifdef CONFIG_SYSCTL +#define DECLARE_SYSCTL_BASE(_name, _table) \ +static struct ctl_table _name##_base_table[] = { \ + { \ + .procname = #_name, \ + .mode = 0555, \ + .child = _table, \ + }, \ + { }, \ +} + +extern int __register_sysctl_base(struct ctl_table *base_table); + +#define register_sysctl_base(_name) __register_sysctl_base(_name##_base_table) + void proc_sys_poll_notify(struct ctl_table_poll *poll); extern void setup_sysctl_set(struct ctl_table_set *p, @@ -236,6 +250,16 @@ extern int no_unaligned_warning; extern struct ctl_table sysctl_mount_point[]; #else /* CONFIG_SYSCTL */ + +#define DECLARE_SYSCTL_BASE(_name, _table) + +static inline int __register_sysctl_base(struct ctl_table *base_table) +{ + return 0; +} + +#define register_sysctl_base(table) __register_sysctl_base(table) + static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { return NULL; -- cgit v1.2.3 From ab171b952c6e065779687b44041038efdadb3915 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 21 Jan 2022 22:13:27 -0800 Subject: fs: move namespace sysctls and declare fs base directory This moves the namespace sysctls to its own file as part of the kernel/sysctl.c spring cleaning Since we have now removed all sysctls for "fs", we now have to declare it on the filesystem code, we do that using the new helper, which reduces boiler plate code. We rename init_fs_shared_sysctls() to init_fs_sysctls() to reflect that now fs/sysctls.c is taking on the burden of being the first to register the base directory as well. Lastly, since init code will load in the order in which we link it we have to move the sysctl code to be linked in early, so that its early init routine runs prior to other fs code. This way, other filesystem code can register their own sysctls using the helpers after this: * register_sysctl_init() * register_sysctl() Link: https://lkml.kernel.org/r/20211129211943.640266-3-mcgrof@kernel.org Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Anil S Keshavamurthy Cc: Antti Palosaari Cc: Christian Brauner Cc: "David S. Miller" Cc: Eric Biederman Cc: Eric Biggers Cc: Iurii Zaikin Cc: Kees Cook Cc: Lukas Middendorf Cc: Masami Hiramatsu Cc: "Naveen N. Rao" Cc: Stephen Kitt Cc: Xiaoming Ni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mount.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index 5d92a7e1a742..7f18a7555dff 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -113,9 +113,6 @@ extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list); extern void mark_mounts_for_expiry(struct list_head *mounts); extern dev_t name_to_dev_t(const char *name); - -extern unsigned int sysctl_mount_max; - extern bool path_is_mountpoint(const struct path *path); extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num); -- cgit v1.2.3 From d8c0418aac78e661b5283c9d6a1dfc61d44f26fd Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 21 Jan 2022 22:13:31 -0800 Subject: kernel/sysctl.c: rename sysctl_init() to sysctl_init_bases() Rename sysctl_init() to sysctl_init_bases() so to reflect exactly what this is doing. Link: https://lkml.kernel.org/r/20211129211943.640266-4-mcgrof@kernel.org Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Anil S Keshavamurthy Cc: Antti Palosaari Cc: Christian Brauner Cc: "David S. Miller" Cc: Eric Biederman Cc: Eric Biggers Cc: Iurii Zaikin Cc: Kees Cook Cc: Lukas Middendorf Cc: Masami Hiramatsu Cc: "Naveen N. Rao" Cc: Stephen Kitt Cc: Xiaoming Ni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 02134a8abad7..180adf7da785 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -228,7 +228,7 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); -extern int sysctl_init(void); +extern int sysctl_init_bases(void); extern void __register_sysctl_init(const char *path, struct ctl_table *table, const char *table_name); #define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table) -- cgit v1.2.3 From fdcd4073fccc6f989308be3f1d61d8a68cd990ce Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 21 Jan 2022 22:13:34 -0800 Subject: printk: fix build warning when CONFIG_PRINTK=n build warning when CONFIG_PRINTK=n kernel/printk/printk.c:175:5: warning: no previous prototype for 'devkmsg_sysctl_set_loglvl' [-Wmissing-prototypes] devkmsg_sysctl_set_loglvl() is only used in sysctl.c when CONFIG_PRINTK=y, but it participates in the build when CONFIG_PRINTK=n. So add compile dependency CONFIG_PRINTK=y && CONFIG_SYSCTL=y to fix the build warning. Link: https://lkml.kernel.org/r/20211129211943.640266-5-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Anil S Keshavamurthy Cc: Antti Palosaari Cc: Christian Brauner Cc: "David S. Miller" Cc: Eric Biederman Cc: Eric Biggers Cc: Iurii Zaikin Cc: Kees Cook Cc: Lukas Middendorf Cc: Masami Hiramatsu Cc: "Naveen N. Rao" Cc: Stephen Kitt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/printk.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index 9497f6b98339..1522df223c0f 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -183,10 +183,6 @@ extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, extern int printk_delay_msec; extern int dmesg_restrict; -extern int -devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void *buf, - size_t *lenp, loff_t *ppos); - extern void wake_up_klogd(void); char *log_buf_addr_get(void); -- cgit v1.2.3 From f0bc21b268c1464603192a00851cdbbf7c2cdc36 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 21 Jan 2022 22:13:38 -0800 Subject: fs/coredump: move coredump sysctls into its own file This moves the fs/coredump.c respective sysctls to its own file. Link: https://lkml.kernel.org/r/20211129211943.640266-6-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Anil S Keshavamurthy Cc: Antti Palosaari Cc: Christian Brauner Cc: "David S. Miller" Cc: Eric Biederman Cc: Eric Biggers Cc: Iurii Zaikin Cc: Kees Cook Cc: Lukas Middendorf Cc: Masami Hiramatsu Cc: "Naveen N. Rao" Cc: Stephen Kitt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/coredump.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 78fcd776b185..248a68c668b4 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -14,10 +14,6 @@ struct core_vma_metadata { unsigned long dump_size; }; -extern int core_uses_pid; -extern char core_pattern[]; -extern unsigned int core_pipe_limit; - /* * These are the only things you should do on a core-file: use only these * functions to write out all the necessary info. @@ -37,4 +33,10 @@ extern void do_coredump(const kernel_siginfo_t *siginfo); static inline void do_coredump(const kernel_siginfo_t *siginfo) {} #endif +#if defined(CONFIG_COREDUMP) && defined(CONFIG_SYSCTL) +extern void validate_coredump_safety(void); +#else +static inline void validate_coredump_safety(void) {} +#endif + #endif /* _LINUX_COREDUMP_H */ -- cgit v1.2.3 From a737a3c6744bc822d1e6a837fef550e665ddf877 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 21 Jan 2022 22:13:41 -0800 Subject: kprobe: move sysctl_kprobes_optimization to kprobes.c kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. Move sysctl_kprobes_optimization from kernel/sysctl.c to kernel/kprobes.c. Use register_sysctl() to register the sysctl interface. [mcgrof@kernel.org: fix compile issue when CONFIG_OPTPROBES is disabled] Link: https://lkml.kernel.org/r/20211129211943.640266-7-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Cc: Al Viro Cc: Anil S Keshavamurthy Cc: Antti Palosaari Cc: Christian Brauner Cc: "David S. Miller" Cc: Eric Biederman Cc: Eric Biggers Cc: Iurii Zaikin Cc: Kees Cook Cc: Lukas Middendorf Cc: Masami Hiramatsu Cc: "Naveen N. Rao" Cc: Stephen Kitt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kprobes.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 8c8f7a4d93af..19b884353b15 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -348,12 +348,6 @@ extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs); DEFINE_INSN_CACHE_OPS(optinsn); -#ifdef CONFIG_SYSCTL -extern int sysctl_kprobes_optimization; -extern int proc_kprobes_optimization_handler(struct ctl_table *table, - int write, void *buffer, - size_t *length, loff_t *ppos); -#endif /* CONFIG_SYSCTL */ extern void wait_for_kprobe_optimizer(void); #else /* !CONFIG_OPTPROBES */ static inline void wait_for_kprobe_optimizer(void) { } -- cgit v1.2.3 From 4a57d6bbaecd28c8175dc5da013009e4158018c2 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 21 Jan 2022 22:14:10 -0800 Subject: locking/rwlocks: introduce write_lock_nested In preparation for converting bit_spin_lock to rwlock in zsmalloc so that multiple writers of zspages can run at the same time but those zspages are supposed to be different zspage instance. Thus, it's not deadlock. This patch adds write_lock_nested to support the case for LOCKDEP. [minchan@kernel.org: fix write_lock_nested for RT] Link: https://lkml.kernel.org/r/YZfrMTAXV56HFWJY@google.com [bigeasy@linutronix.de: fixup write_lock_nested() implementation] Link: https://lkml.kernel.org/r/20211123170134.y6xb7pmpgdn4m3bn@linutronix.de Link: https://lkml.kernel.org/r/20211115185909.3949505-8-minchan@kernel.org Signed-off-by: Minchan Kim Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Acked-by: Sebastian Andrzej Siewior Tested-by: Sebastian Andrzej Siewior Cc: Mike Galbraith Cc: Sergey Senozhatsky Cc: Thomas Gleixner Cc: Naresh Kamboju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rwlock.h | 6 ++++++ include/linux/rwlock_api_smp.h | 8 ++++++++ include/linux/rwlock_rt.h | 10 ++++++++++ include/linux/spinlock_api_up.h | 1 + 4 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 2c0ad417ce3c..8f416c5e929e 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -55,6 +55,12 @@ do { \ #define write_lock(lock) _raw_write_lock(lock) #define read_lock(lock) _raw_read_lock(lock) +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#define write_lock_nested(lock, subclass) _raw_write_lock_nested(lock, subclass) +#else +#define write_lock_nested(lock, subclass) _raw_write_lock(lock) +#endif + #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) #define read_lock_irqsave(lock, flags) \ diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h index f1db6f17c4fb..dceb0a59b692 100644 --- a/include/linux/rwlock_api_smp.h +++ b/include/linux/rwlock_api_smp.h @@ -17,6 +17,7 @@ void __lockfunc _raw_read_lock(rwlock_t *lock) __acquires(lock); void __lockfunc _raw_write_lock(rwlock_t *lock) __acquires(lock); +void __lockfunc _raw_write_lock_nested(rwlock_t *lock, int subclass) __acquires(lock); void __lockfunc _raw_read_lock_bh(rwlock_t *lock) __acquires(lock); void __lockfunc _raw_write_lock_bh(rwlock_t *lock) __acquires(lock); void __lockfunc _raw_read_lock_irq(rwlock_t *lock) __acquires(lock); @@ -209,6 +210,13 @@ static inline void __raw_write_lock(rwlock_t *lock) LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock); } +static inline void __raw_write_lock_nested(rwlock_t *lock, int subclass) +{ + preempt_disable(); + rwlock_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock); +} + #endif /* !CONFIG_GENERIC_LOCKBREAK || CONFIG_DEBUG_LOCK_ALLOC */ static inline void __raw_write_unlock(rwlock_t *lock) diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h index 49c1f3842ed5..8544ff05e594 100644 --- a/include/linux/rwlock_rt.h +++ b/include/linux/rwlock_rt.h @@ -28,6 +28,7 @@ extern void rt_read_lock(rwlock_t *rwlock); extern int rt_read_trylock(rwlock_t *rwlock); extern void rt_read_unlock(rwlock_t *rwlock); extern void rt_write_lock(rwlock_t *rwlock); +extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass); extern int rt_write_trylock(rwlock_t *rwlock); extern void rt_write_unlock(rwlock_t *rwlock); @@ -83,6 +84,15 @@ static __always_inline void write_lock(rwlock_t *rwlock) rt_write_lock(rwlock); } +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static __always_inline void write_lock_nested(rwlock_t *rwlock, int subclass) +{ + rt_write_lock_nested(rwlock, subclass); +} +#else +#define write_lock_nested(lock, subclass) rt_write_lock(((void)(subclass), (lock))) +#endif + static __always_inline void write_lock_bh(rwlock_t *rwlock) { local_bh_disable(); diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h index d0d188861ad6..b8ba00ccccde 100644 --- a/include/linux/spinlock_api_up.h +++ b/include/linux/spinlock_api_up.h @@ -59,6 +59,7 @@ #define _raw_spin_lock_nested(lock, subclass) __LOCK(lock) #define _raw_read_lock(lock) __LOCK(lock) #define _raw_write_lock(lock) __LOCK(lock) +#define _raw_write_lock_nested(lock, subclass) __LOCK(lock) #define _raw_spin_lock_bh(lock) __LOCK_BH(lock) #define _raw_read_lock_bh(lock) __LOCK_BH(lock) #define _raw_write_lock_bh(lock) __LOCK_BH(lock) -- cgit v1.2.3 From 6dfbbae14a7b961f41d80a106e1ab60e86d061c5 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 21 Jan 2022 22:14:20 -0800 Subject: fs: proc: store PDE()->data into inode->i_private PDE_DATA(inode) is introduced to get user private data and hide the layout of struct proc_dir_entry. The inode->i_private is used to do the same thing as well. Save a copy of user private data to inode-> i_private when proc inode is allocated. This means the user also can get their private data by inode->i_private. Introduce pde_data() to wrap inode->i_private so that we can remove PDE_DATA() from fs/proc/generic.c and make PTE_DATE() as a wrapper of pde_data(). It will be easier if we decide to remove PDE_DATE() in the future. Link: https://lkml.kernel.org/r/20211124081956.87711-1-songmuchun@bytedance.com Signed-off-by: Muchun Song Acked-by: Christian Brauner Cc: Alexey Dobriyan Cc: Alexey Gladkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 01b9268451a8..b6e7005cc1b2 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -110,7 +110,18 @@ extern struct proc_dir_entry *proc_create_data(const char *, umode_t, struct proc_dir_entry *proc_create(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct proc_ops *proc_ops); extern void proc_set_size(struct proc_dir_entry *, loff_t); extern void proc_set_user(struct proc_dir_entry *, kuid_t, kgid_t); -extern void *PDE_DATA(const struct inode *); + +/* + * Obtain the private data passed by user through proc_create_data() or + * related. + */ +static inline void *pde_data(const struct inode *inode) +{ + return inode->i_private; +} + +#define PDE_DATA(i) pde_data(i) + extern void *proc_get_parent_data(const struct inode *); extern void proc_remove(struct proc_dir_entry *); extern void remove_proc_entry(const char *, struct proc_dir_entry *); -- cgit v1.2.3 From 359745d78351c6f5442435f81549f0207ece28aa Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 21 Jan 2022 22:14:23 -0800 Subject: proc: remove PDE_DATA() completely Remove PDE_DATA() completely and replace it with pde_data(). [akpm@linux-foundation.org: fix naming clash in drivers/nubus/proc.c] [akpm@linux-foundation.org: now fix it properly] Link: https://lkml.kernel.org/r/20211124081956.87711-2-songmuchun@bytedance.com Signed-off-by: Muchun Song Acked-by: Christian Brauner Cc: Alexey Dobriyan Cc: Alexey Gladkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 4 +--- include/linux/seq_file.h | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index b6e7005cc1b2..81d6e4ec2294 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -120,8 +120,6 @@ static inline void *pde_data(const struct inode *inode) return inode->i_private; } -#define PDE_DATA(i) pde_data(i) - extern void *proc_get_parent_data(const struct inode *); extern void proc_remove(struct proc_dir_entry *); extern void remove_proc_entry(const char *, struct proc_dir_entry *); @@ -202,7 +200,7 @@ proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, static inline void proc_set_size(struct proc_dir_entry *de, loff_t size) {} static inline void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid) {} -static inline void *PDE_DATA(const struct inode *inode) {BUG(); return NULL;} +static inline void *pde_data(const struct inode *inode) {BUG(); return NULL;} static inline void *proc_get_parent_data(const struct inode *inode) { BUG(); return NULL; } static inline void proc_remove(struct proc_dir_entry *de) {} diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 72dbb44a4573..88cc16444b43 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -209,7 +209,7 @@ static const struct file_operations __name ## _fops = { \ #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ - return single_open(file, __name ## _show, PDE_DATA(inode)); \ + return single_open(file, __name ## _show, pde_data(inode)); \ } \ \ static const struct proc_ops __name ## _proc_ops = { \ -- cgit v1.2.3 From 2dba5eb1c73b6ba2988ced07250edeac0f8cbf5a Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 21 Jan 2022 22:14:27 -0800 Subject: lib/stackdepot: allow optional init and stack_table allocation by kvmalloc() Currently, enabling CONFIG_STACKDEPOT means its stack_table will be allocated from memblock, even if stack depot ends up not actually used. The default size of stack_table is 4MB on 32-bit, 8MB on 64-bit. This is fine for use-cases such as KASAN which is also a config option and has overhead on its own. But it's an issue for functionality that has to be actually enabled on boot (page_owner) or depends on hardware (GPU drivers) and thus the memory might be wasted. This was raised as an issue [1] when attempting to add stackdepot support for SLUB's debug object tracking functionality. It's common to build kernels with CONFIG_SLUB_DEBUG and enable slub_debug on boot only when needed, or create only specific kmem caches with debugging for testing purposes. It would thus be more efficient if stackdepot's table was allocated only when actually going to be used. This patch thus makes the allocation (and whole stack_depot_init() call) optional: - Add a CONFIG_STACKDEPOT_ALWAYS_INIT flag to keep using the current well-defined point of allocation as part of mem_init(). Make CONFIG_KASAN select this flag. - Other users have to call stack_depot_init() as part of their own init when it's determined that stack depot will actually be used. This may depend on both config and runtime conditions. Convert current users which are page_owner and several in the DRM subsystem. Same will be done for SLUB later. - Because the init might now be called after the boot-time memblock allocation has given all memory to the buddy allocator, change stack_depot_init() to allocate stack_table with kvmalloc() when memblock is no longer available. Also handle allocation failure by disabling stackdepot (could have theoretically happened even with memblock allocation previously), and don't unnecessarily align the memblock allocation to its own size anymore. [1] https://lore.kernel.org/all/CAMuHMdW=eoVzM1Re5FVoEN87nKfiLmM2+Ah7eNu2KXEhCvbZyA@mail.gmail.com/ Link: https://lkml.kernel.org/r/20211013073005.11351-1-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Dmitry Vyukov Reviewed-by: Marco Elver # stackdepot Cc: Marco Elver Cc: Vijayanand Jitta Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Geert Uytterhoeven Cc: Oliver Glitta Cc: Imran Khan From: Colin Ian King Subject: lib/stackdepot: fix spelling mistake and grammar in pr_err message There is a spelling mistake of the work allocation so fix this and re-phrase the message to make it easier to read. Link: https://lkml.kernel.org/r/20211015104159.11282-1-colin.king@canonical.com Signed-off-by: Colin Ian King Cc: Vlastimil Babka From: Vlastimil Babka Subject: lib/stackdepot: allow optional init and stack_table allocation by kvmalloc() - fixup On FLATMEM, we call page_ext_init_flatmem_late() just before kmem_cache_init() which means stack_depot_init() (called by page owner init) will not recognize properly it should use kvmalloc() and not memblock_alloc(). memblock_alloc() will also not issue a warning and return a block memory that can be invalid and cause kernel page fault when saving stacks, as reported by the kernel test robot [1]. Fix this by moving page_ext_init_flatmem_late() below kmem_cache_init() so that slab_is_available() is true during stack_depot_init(). SPARSEMEM doesn't have this issue, as it doesn't do page_ext_init_flatmem_late(), but a different page_ext_init() even later in the boot process. Thanks to Mike Rapoport for pointing out the FLATMEM init ordering issue. While at it, also actually resolve a checkpatch warning in stack_depot_init() from DRM CI, which was supposed to be in the original patch already. [1] https://lore.kernel.org/all/20211014085450.GC18719@xsang-OptiPlex-9020/ Link: https://lkml.kernel.org/r/6abd9213-19a9-6d58-cedc-2414386d2d81@suse.cz Signed-off-by: Vlastimil Babka Reported-by: kernel test robot Cc: Mike Rapoport Cc: Stephen Rothwell From: Vlastimil Babka Subject: lib/stackdepot: allow optional init and stack_table allocation by kvmalloc() - fixup3 Due to cd06ab2fd48f ("drm/locking: add backtrace for locking contended locks without backoff") landing recently to -next adding a new stack depot user in drivers/gpu/drm/drm_modeset_lock.c we need to add an appropriate call to stack_depot_init() there as well. Link: https://lkml.kernel.org/r/2a692365-cfa1-64f2-34e0-8aa5674dce5e@suse.cz Signed-off-by: Vlastimil Babka Cc: Jani Nikula Cc: Naresh Kamboju Cc: Marco Elver Cc: Vijayanand Jitta Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Geert Uytterhoeven Cc: Oliver Glitta Cc: Imran Khan Cc: Stephen Rothwell From: Vlastimil Babka Subject: lib/stackdepot: allow optional init and stack_table allocation by kvmalloc() - fixup4 Due to 4e66934eaadc ("lib: add reference counting tracking infrastructure") landing recently to net-next adding a new stack depot user in lib/ref_tracker.c we need to add an appropriate call to stack_depot_init() there as well. Link: https://lkml.kernel.org/r/45c1b738-1a2f-5b5f-2f6d-86fab206d01c@suse.cz Signed-off-by: Vlastimil Babka Reviewed-by: Eric Dumazet Cc: Jiri Slab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ref_tracker.h | 2 ++ include/linux/stackdepot.h | 25 ++++++++++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h index c11c9db5825c..60f3453be23e 100644 --- a/include/linux/ref_tracker.h +++ b/include/linux/ref_tracker.h @@ -4,6 +4,7 @@ #include #include #include +#include struct ref_tracker; @@ -26,6 +27,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, spin_lock_init(&dir->lock); dir->quarantine_avail = quarantine_count; refcount_set(&dir->untracked, 1); + stack_depot_init(); } void ref_tracker_dir_exit(struct ref_tracker_dir *dir); diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index c34b55a6e554..17f992fe6355 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -19,6 +19,22 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags, bool can_alloc); +/* + * Every user of stack depot has to call this during its own init when it's + * decided that it will be calling stack_depot_save() later. + * + * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot + * enabled as part of mm_init(), for subsystems where it's known at compile time + * that stack depot will be used. + */ +int stack_depot_init(void); + +#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT +static inline int stack_depot_early_init(void) { return stack_depot_init(); } +#else +static inline int stack_depot_early_init(void) { return 0; } +#endif + depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); @@ -30,13 +46,4 @@ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, void stack_depot_print(depot_stack_handle_t stack); -#ifdef CONFIG_STACKDEPOT -int stack_depot_init(void); -#else -static inline int stack_depot_init(void) -{ - return 0; -} -#endif /* CONFIG_STACKDEPOT */ - #endif -- cgit v1.2.3 From 0a4ee518185e902758191d968600399f3bc2be31 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Jan 2022 22:14:34 -0800 Subject: mm: remove cleancache Patch series "remove Xen tmem leftovers". Since the removal of the Xen tmem driver in 2019, the cleancache hooks are entirely unused, as are large parts of frontswap. This series against linux-next (with the folio changes included) removes cleancaches, and cuts down frontswap to the bits actually used by zswap. This patch (of 13): The cleancache subsystem is unused since the removal of Xen tmem driver in commit 814bbf49dcd0 ("xen: remove tmem driver"). [akpm@linux-foundation.org: remove now-unreachable code] Link: https://lkml.kernel.org/r/20211224062246.1258487-1-hch@lst.de Link: https://lkml.kernel.org/r/20211224062246.1258487-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Juergen Gross Acked-by: Geert Uytterhoeven Cc: Konrad Rzeszutek Wilk Cc: Hugh Dickins Cc: Seth Jennings Cc: Dan Streetman Cc: Vitaly Wool Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cleancache.h | 124 --------------------------------------------- include/linux/fs.h | 5 -- 2 files changed, 129 deletions(-) delete mode 100644 include/linux/cleancache.h (limited to 'include') diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h deleted file mode 100644 index 5f5730c1d324..000000000000 --- a/include/linux/cleancache.h +++ /dev/null @@ -1,124 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_CLEANCACHE_H -#define _LINUX_CLEANCACHE_H - -#include -#include -#include - -#define CLEANCACHE_NO_POOL -1 -#define CLEANCACHE_NO_BACKEND -2 -#define CLEANCACHE_NO_BACKEND_SHARED -3 - -#define CLEANCACHE_KEY_MAX 6 - -/* - * cleancache requires every file with a page in cleancache to have a - * unique key unless/until the file is removed/truncated. For some - * filesystems, the inode number is unique, but for "modern" filesystems - * an exportable filehandle is required (see exportfs.h) - */ -struct cleancache_filekey { - union { - ino_t ino; - __u32 fh[CLEANCACHE_KEY_MAX]; - u32 key[CLEANCACHE_KEY_MAX]; - } u; -}; - -struct cleancache_ops { - int (*init_fs)(size_t); - int (*init_shared_fs)(uuid_t *uuid, size_t); - int (*get_page)(int, struct cleancache_filekey, - pgoff_t, struct page *); - void (*put_page)(int, struct cleancache_filekey, - pgoff_t, struct page *); - void (*invalidate_page)(int, struct cleancache_filekey, pgoff_t); - void (*invalidate_inode)(int, struct cleancache_filekey); - void (*invalidate_fs)(int); -}; - -extern int cleancache_register_ops(const struct cleancache_ops *ops); -extern void __cleancache_init_fs(struct super_block *); -extern void __cleancache_init_shared_fs(struct super_block *); -extern int __cleancache_get_page(struct page *); -extern void __cleancache_put_page(struct page *); -extern void __cleancache_invalidate_page(struct address_space *, struct page *); -extern void __cleancache_invalidate_inode(struct address_space *); -extern void __cleancache_invalidate_fs(struct super_block *); - -#ifdef CONFIG_CLEANCACHE -#define cleancache_enabled (1) -static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping) -{ - return mapping->host->i_sb->cleancache_poolid >= 0; -} -static inline bool cleancache_fs_enabled(struct page *page) -{ - return cleancache_fs_enabled_mapping(page->mapping); -} -#else -#define cleancache_enabled (0) -#define cleancache_fs_enabled(_page) (0) -#define cleancache_fs_enabled_mapping(_page) (0) -#endif - -/* - * The shim layer provided by these inline functions allows the compiler - * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE - * is disabled, to a single global variable check if CONFIG_CLEANCACHE - * is enabled but no cleancache "backend" has dynamically enabled it, - * and, for the most frequent cleancache ops, to a single global variable - * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled - * and a cleancache backend has dynamically enabled cleancache, but the - * filesystem referenced by that cleancache op has not enabled cleancache. - * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially - * no measurable performance impact. - */ - -static inline void cleancache_init_fs(struct super_block *sb) -{ - if (cleancache_enabled) - __cleancache_init_fs(sb); -} - -static inline void cleancache_init_shared_fs(struct super_block *sb) -{ - if (cleancache_enabled) - __cleancache_init_shared_fs(sb); -} - -static inline int cleancache_get_page(struct page *page) -{ - if (cleancache_enabled && cleancache_fs_enabled(page)) - return __cleancache_get_page(page); - return -1; -} - -static inline void cleancache_put_page(struct page *page) -{ - if (cleancache_enabled && cleancache_fs_enabled(page)) - __cleancache_put_page(page); -} - -static inline void cleancache_invalidate_page(struct address_space *mapping, - struct page *page) -{ - /* careful... page->mapping is NULL sometimes when this is called */ - if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) - __cleancache_invalidate_page(mapping, page); -} - -static inline void cleancache_invalidate_inode(struct address_space *mapping) -{ - if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) - __cleancache_invalidate_inode(mapping); -} - -static inline void cleancache_invalidate_fs(struct super_block *sb) -{ - if (cleancache_enabled) - __cleancache_invalidate_fs(sb); -} - -#endif /* _LINUX_CLEANCACHE_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 9617dea24978..f3daaea16554 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1535,11 +1535,6 @@ struct super_block { const struct dentry_operations *s_d_op; /* default d_op for dentries */ - /* - * Saved pool identifier for cleancache (-1 means none) - */ - int cleancache_poolid; - struct shrinker s_shrink; /* per-sb shrinker handle */ /* Number of inodes with nlink == 0 but still referenced */ -- cgit v1.2.3 From 3d6035f136009f9cae380022754cba31f32570c5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Jan 2022 22:14:38 -0800 Subject: frontswap: remove frontswap_writethrough frontswap_writethrough is never called, so remove it. Link: https://lkml.kernel.org/r/20211224062246.1258487-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Juergen Gross Cc: Dan Streetman Cc: Geert Uytterhoeven Cc: Hugh Dickins Cc: Konrad Rzeszutek Wilk Cc: Matthew Wilcox (Oracle) Cc: Seth Jennings Cc: Vitaly Wool Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index b07d88c92bb2..4a03fda41572 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -26,7 +26,6 @@ struct frontswap_ops { extern void frontswap_register_ops(struct frontswap_ops *ops); extern void frontswap_shrink(unsigned long); extern unsigned long frontswap_curr_pages(void); -extern void frontswap_writethrough(bool); #define FRONTSWAP_HAS_EXCLUSIVE_GETS extern void frontswap_tmem_exclusive_gets(bool); -- cgit v1.2.3 From 71024cb4a0bfe7767aec7a128d0a1a13a37b7fcd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Jan 2022 22:14:41 -0800 Subject: frontswap: remove frontswap_tmem_exclusive_gets frontswap_tmem_exclusive_gets is never called, so remove it. Link: https://lkml.kernel.org/r/20211224062246.1258487-4-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Juergen Gross Cc: Dan Streetman Cc: Geert Uytterhoeven Cc: Hugh Dickins Cc: Konrad Rzeszutek Wilk Cc: Matthew Wilcox (Oracle) Cc: Seth Jennings Cc: Vitaly Wool Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 4a03fda41572..83a56392cc7f 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -26,8 +26,6 @@ struct frontswap_ops { extern void frontswap_register_ops(struct frontswap_ops *ops); extern void frontswap_shrink(unsigned long); extern unsigned long frontswap_curr_pages(void); -#define FRONTSWAP_HAS_EXCLUSIVE_GETS -extern void frontswap_tmem_exclusive_gets(bool); extern bool __frontswap_test(struct swap_info_struct *, pgoff_t); extern void __frontswap_init(unsigned type, unsigned long *map); -- cgit v1.2.3 From 0b364446d734da76e421dbfb09e5268270cefaf0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Jan 2022 22:14:44 -0800 Subject: frontswap: remove frontswap_shrink frontswap_shrink is never called, so remove it. Link: https://lkml.kernel.org/r/20211224062246.1258487-5-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Juergen Gross Cc: Dan Streetman Cc: Geert Uytterhoeven Cc: Hugh Dickins Cc: Konrad Rzeszutek Wilk Cc: Matthew Wilcox (Oracle) Cc: Seth Jennings Cc: Vitaly Wool Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 83a56392cc7f..d268d7bb6513 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -24,7 +24,6 @@ struct frontswap_ops { }; extern void frontswap_register_ops(struct frontswap_ops *ops); -extern void frontswap_shrink(unsigned long); extern unsigned long frontswap_curr_pages(void); extern bool __frontswap_test(struct swap_info_struct *, pgoff_t); -- cgit v1.2.3 From 3e8e1af63d7a831f576477c25d9b89049bd2d53d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Jan 2022 22:14:47 -0800 Subject: frontswap: remove frontswap_curr_pages frontswap_curr_pages is never called, so remove it. Link: https://lkml.kernel.org/r/20211224062246.1258487-6-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Juergen Gross Cc: Dan Streetman Cc: Geert Uytterhoeven Cc: Hugh Dickins Cc: Konrad Rzeszutek Wilk Cc: Matthew Wilcox (Oracle) Cc: Seth Jennings Cc: Vitaly Wool Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index d268d7bb6513..5205c2977b20 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -24,7 +24,6 @@ struct frontswap_ops { }; extern void frontswap_register_ops(struct frontswap_ops *ops); -extern unsigned long frontswap_curr_pages(void); extern bool __frontswap_test(struct swap_info_struct *, pgoff_t); extern void __frontswap_init(unsigned type, unsigned long *map); -- cgit v1.2.3 From 1cf53c894d15dd4b73397a56fa055d76d3db66b4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Jan 2022 22:14:51 -0800 Subject: frontswap: simplify frontswap_init Just use IS_ENABLED() and remove the __frontswap_init indirection. Also remove the unused export. Link: https://lkml.kernel.org/r/20211224062246.1258487-7-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Juergen Gross Cc: Dan Streetman Cc: Geert Uytterhoeven Cc: Hugh Dickins Cc: Konrad Rzeszutek Wilk Cc: Matthew Wilcox (Oracle) Cc: Seth Jennings Cc: Vitaly Wool Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 5205c2977b20..73d7beb44f2b 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -26,7 +26,7 @@ struct frontswap_ops { extern void frontswap_register_ops(struct frontswap_ops *ops); extern bool __frontswap_test(struct swap_info_struct *, pgoff_t); -extern void __frontswap_init(unsigned type, unsigned long *map); +extern void frontswap_init(unsigned type, unsigned long *map); extern int __frontswap_store(struct page *page); extern int __frontswap_load(struct page *page); extern void __frontswap_invalidate_page(unsigned, pgoff_t); @@ -107,11 +107,4 @@ static inline void frontswap_invalidate_area(unsigned type) __frontswap_invalidate_area(type); } -static inline void frontswap_init(unsigned type, unsigned long *map) -{ -#ifdef CONFIG_FRONTSWAP - __frontswap_init(type, map); -#endif -} - #endif /* _LINUX_FRONTSWAP_H */ -- cgit v1.2.3 From 10a9c496789fe2098bfc018650fc77b23ba08a54 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Jan 2022 22:14:57 -0800 Subject: mm: simplify try_to_unuse Remove the unused frontswap and pages_to_unuse arguments, and mark the function static now that the caller in frontswap is gone. [akpm@linux-foundation.org: fix shmem_unuse() stub, per Matthew] Link: https://lkml.kernel.org/r/20211224062246.1258487-9-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Juergen Gross Cc: Dan Streetman Cc: Geert Uytterhoeven Cc: Hugh Dickins Cc: Konrad Rzeszutek Wilk Cc: Matthew Wilcox (Oracle) Cc: Seth Jennings Cc: Vitaly Wool Cc: Naresh Kamboju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 7 ------- include/linux/shmem_fs.h | 3 +-- include/linux/swapfile.h | 1 - 3 files changed, 1 insertion(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 73d7beb44f2b..a9817d4fa74c 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -7,13 +7,6 @@ #include #include -/* - * Return code to denote that requested number of - * frontswap pages are unused(moved to page cache). - * Used in shmem_unuse and try_to_unuse. - */ -#define FRONTSWAP_PAGES_UNUSED 2 - struct frontswap_ops { void (*init)(unsigned); /* this swap type was just swapon'ed */ int (*store)(unsigned, pgoff_t, struct page *); /* store a page */ diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 166158b6e917..e65b80ed09e7 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -83,8 +83,7 @@ extern void shmem_unlock_mapping(struct address_space *mapping); extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); -extern int shmem_unuse(unsigned int type, bool frontswap, - unsigned long *fs_pages_to_unuse); +int shmem_unuse(unsigned int type); extern bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, pgoff_t index); diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h index e06febf62978..809cd01ef2c5 100644 --- a/include/linux/swapfile.h +++ b/include/linux/swapfile.h @@ -9,7 +9,6 @@ extern spinlock_t swap_lock; extern struct plist_head swap_active_head; extern struct swap_info_struct *swap_info[]; -extern int try_to_unuse(unsigned int, bool, unsigned long); extern unsigned long generic_max_swapfile_size(void); extern unsigned long max_swapfile_size(void); -- cgit v1.2.3 From bd9cd521496ba8d537d8f46f4167bf4221aba9a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Jan 2022 22:15:01 -0800 Subject: frontswap: remove frontswap_test frontswap_test is unused now, remove it. Link: https://lkml.kernel.org/r/20211224062246.1258487-10-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Juergen Gross Cc: Dan Streetman Cc: Geert Uytterhoeven Cc: Hugh Dickins Cc: Konrad Rzeszutek Wilk Cc: Matthew Wilcox (Oracle) Cc: Seth Jennings Cc: Vitaly Wool Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index a9817d4fa74c..c5b2848d2240 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -18,7 +18,6 @@ struct frontswap_ops { extern void frontswap_register_ops(struct frontswap_ops *ops); -extern bool __frontswap_test(struct swap_info_struct *, pgoff_t); extern void frontswap_init(unsigned type, unsigned long *map); extern int __frontswap_store(struct page *page); extern int __frontswap_load(struct page *page); @@ -33,11 +32,6 @@ static inline bool frontswap_enabled(void) return static_branch_unlikely(&frontswap_enabled_key); } -static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) -{ - return __frontswap_test(sis, offset); -} - static inline void frontswap_map_set(struct swap_info_struct *p, unsigned long *map) { @@ -56,11 +50,6 @@ static inline bool frontswap_enabled(void) return false; } -static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) -{ - return false; -} - static inline void frontswap_map_set(struct swap_info_struct *p, unsigned long *map) { -- cgit v1.2.3 From 633423a09cb5cfe61438283e1ce49c23cf4a0611 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Jan 2022 22:15:07 -0800 Subject: mm: mark swap_lock and swap_active_head static swap_lock and swap_active_head are only used in swapfile.c, so mark them static. Link: https://lkml.kernel.org/r/20211224062246.1258487-12-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Juergen Gross Cc: Dan Streetman Cc: Geert Uytterhoeven Cc: Hugh Dickins Cc: Konrad Rzeszutek Wilk Cc: Matthew Wilcox (Oracle) Cc: Seth Jennings Cc: Vitaly Wool Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swapfile.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h index 809cd01ef2c5..54078542134c 100644 --- a/include/linux/swapfile.h +++ b/include/linux/swapfile.h @@ -6,8 +6,6 @@ * these were static in swapfile.c but frontswap.c needs them and we don't * want to expose them to the dozens of source files that include swap.h */ -extern spinlock_t swap_lock; -extern struct plist_head swap_active_head; extern struct swap_info_struct *swap_info[]; extern unsigned long generic_max_swapfile_size(void); extern unsigned long max_swapfile_size(void); -- cgit v1.2.3 From 1da0d94a3ec8c5f3793b7be8538b55e60ebeefe3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Jan 2022 22:15:10 -0800 Subject: frontswap: remove support for multiple ops There is only a single instance of frontswap ops in the kernel, so simplify the frontswap code by removing support for multiple operations. Link: https://lkml.kernel.org/r/20211224062246.1258487-13-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Juergen Gross Cc: Dan Streetman Cc: Geert Uytterhoeven Cc: Hugh Dickins Cc: Konrad Rzeszutek Wilk Cc: Matthew Wilcox (Oracle) Cc: Seth Jennings Cc: Vitaly Wool Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index c5b2848d2240..a631bac12220 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -13,10 +13,9 @@ struct frontswap_ops { int (*load)(unsigned, pgoff_t, struct page *); /* load a page */ void (*invalidate_page)(unsigned, pgoff_t); /* page no longer needed */ void (*invalidate_area)(unsigned); /* swap type just swapoff'ed */ - struct frontswap_ops *next; /* private pointer to next ops */ }; -extern void frontswap_register_ops(struct frontswap_ops *ops); +int frontswap_register_ops(const struct frontswap_ops *ops); extern void frontswap_init(unsigned type, unsigned long *map); extern int __frontswap_store(struct page *page); -- cgit v1.2.3 From 25d490eb46486e88c16e64d9eb7cfd33a642d596 Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Sat, 18 Dec 2021 09:30:40 +0100 Subject: ARM: 9172/1: amba: Cleanup amba pclk operation There is no user about amba_pclk_[un]prepare() besides pl330.c, directly use clk_[un]prepare(). After this, all the function about amba pclk operation, enable, disable, [un]prepare could be killed. Acked-by: Vinod Koul Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) --- include/linux/amba/bus.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 6c7f47846971..09174970b855 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -121,26 +121,6 @@ struct amba_device *amba_find_device(const char *, struct device *, unsigned int int amba_request_regions(struct amba_device *, const char *); void amba_release_regions(struct amba_device *); -static inline int amba_pclk_enable(struct amba_device *dev) -{ - return clk_enable(dev->pclk); -} - -static inline void amba_pclk_disable(struct amba_device *dev) -{ - clk_disable(dev->pclk); -} - -static inline int amba_pclk_prepare(struct amba_device *dev) -{ - return clk_prepare(dev->pclk); -} - -static inline void amba_pclk_unprepare(struct amba_device *dev) -{ - clk_unprepare(dev->pclk); -} - /* Some drivers don't use the struct amba_device */ #define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff) #define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f) -- cgit v1.2.3 From dacf3ca134d0dc105caee77651a349a86bd77456 Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Sat, 18 Dec 2021 09:30:41 +0100 Subject: ARM: 9173/1: amba: kill amba_find_match() There is no one use amba_find_match(), kill it. Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) --- include/linux/amba/bus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 09174970b855..6562f543c3e0 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -117,7 +117,6 @@ void amba_device_put(struct amba_device *); int amba_device_add(struct amba_device *, struct resource *); int amba_device_register(struct amba_device *, struct resource *); void amba_device_unregister(struct amba_device *); -struct amba_device *amba_find_device(const char *, struct device *, unsigned int, unsigned int); int amba_request_regions(struct amba_device *, const char *); void amba_release_regions(struct amba_device *); -- cgit v1.2.3